diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,103601 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 14794, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 6.75949709341625e-05, + "grad_norm": 84.13553619384766, + "learning_rate": 0.0, + "loss": 0.78857421875, + "step": 1 + }, + { + "epoch": 0.000135189941868325, + "grad_norm": 80.1082763671875, + "learning_rate": 5.859375e-08, + "loss": 0.7568359375, + "step": 2 + }, + { + "epoch": 0.0002027849128024875, + "grad_norm": 91.41277313232422, + "learning_rate": 1.171875e-07, + "loss": 0.7646484375, + "step": 3 + }, + { + "epoch": 0.00027037988373665, + "grad_norm": 56.89262390136719, + "learning_rate": 1.7578125e-07, + "loss": 0.739990234375, + "step": 4 + }, + { + "epoch": 0.0003379748546708125, + "grad_norm": 70.60987854003906, + "learning_rate": 2.34375e-07, + "loss": 0.7451171875, + "step": 5 + }, + { + "epoch": 0.000405569825604975, + "grad_norm": 86.31484985351562, + "learning_rate": 2.9296875000000003e-07, + "loss": 0.754638671875, + "step": 6 + }, + { + "epoch": 0.0004731647965391375, + "grad_norm": 66.74529266357422, + "learning_rate": 3.515625e-07, + "loss": 0.744140625, + "step": 7 + }, + { + "epoch": 0.0005407597674733, + "grad_norm": 74.97181701660156, + "learning_rate": 4.1015625e-07, + "loss": 0.75146484375, + "step": 8 + }, + { + "epoch": 0.0006083547384074625, + "grad_norm": 86.72461700439453, + "learning_rate": 4.6875e-07, + "loss": 0.754638671875, + "step": 9 + }, + { + "epoch": 0.000675949709341625, + "grad_norm": 83.04508972167969, + "learning_rate": 5.2734375e-07, + "loss": 0.75244140625, + "step": 10 + }, + { + "epoch": 0.0007435446802757875, + "grad_norm": 90.59898376464844, + "learning_rate": 5.859375000000001e-07, + "loss": 0.75927734375, + "step": 11 + }, + { + "epoch": 0.00081113965120995, + "grad_norm": 94.52167510986328, + "learning_rate": 6.4453125e-07, + "loss": 0.760986328125, + "step": 12 + }, + { + "epoch": 0.0008787346221441124, + "grad_norm": 73.63838195800781, + "learning_rate": 7.03125e-07, + "loss": 0.74072265625, + "step": 13 + }, + { + "epoch": 0.000946329593078275, + "grad_norm": 82.92549896240234, + "learning_rate": 7.6171875e-07, + "loss": 0.74853515625, + "step": 14 + }, + { + "epoch": 0.0010139245640124375, + "grad_norm": 94.22863006591797, + "learning_rate": 8.203125e-07, + "loss": 0.752197265625, + "step": 15 + }, + { + "epoch": 0.0010815195349466, + "grad_norm": 94.93557739257812, + "learning_rate": 8.7890625e-07, + "loss": 0.749267578125, + "step": 16 + }, + { + "epoch": 0.0011491145058807625, + "grad_norm": 71.57659149169922, + "learning_rate": 9.375e-07, + "loss": 0.735595703125, + "step": 17 + }, + { + "epoch": 0.001216709476814925, + "grad_norm": 79.22907257080078, + "learning_rate": 9.9609375e-07, + "loss": 0.729248046875, + "step": 18 + }, + { + "epoch": 0.0012843044477490874, + "grad_norm": 70.3331298828125, + "learning_rate": 1.0546875e-06, + "loss": 0.726318359375, + "step": 19 + }, + { + "epoch": 0.00135189941868325, + "grad_norm": 78.46646881103516, + "learning_rate": 1.11328125e-06, + "loss": 0.726318359375, + "step": 20 + }, + { + "epoch": 0.0014194943896174124, + "grad_norm": 74.18952941894531, + "learning_rate": 1.1718750000000001e-06, + "loss": 0.72607421875, + "step": 21 + }, + { + "epoch": 0.001487089360551575, + "grad_norm": 86.75850677490234, + "learning_rate": 1.23046875e-06, + "loss": 0.7021484375, + "step": 22 + }, + { + "epoch": 0.0015546843314857375, + "grad_norm": 63.5222053527832, + "learning_rate": 1.2890625e-06, + "loss": 0.69873046875, + "step": 23 + }, + { + "epoch": 0.0016222793024199, + "grad_norm": 86.1124038696289, + "learning_rate": 1.34765625e-06, + "loss": 0.699951171875, + "step": 24 + }, + { + "epoch": 0.0016898742733540625, + "grad_norm": 66.45025634765625, + "learning_rate": 1.40625e-06, + "loss": 0.673828125, + "step": 25 + }, + { + "epoch": 0.0017574692442882249, + "grad_norm": 84.15271759033203, + "learning_rate": 1.46484375e-06, + "loss": 0.6640625, + "step": 26 + }, + { + "epoch": 0.0018250642152223874, + "grad_norm": 66.36346435546875, + "learning_rate": 1.5234375e-06, + "loss": 0.682373046875, + "step": 27 + }, + { + "epoch": 0.00189265918615655, + "grad_norm": 57.96862030029297, + "learning_rate": 1.5820312500000001e-06, + "loss": 0.666015625, + "step": 28 + }, + { + "epoch": 0.0019602541570907126, + "grad_norm": 77.31134796142578, + "learning_rate": 1.640625e-06, + "loss": 0.650146484375, + "step": 29 + }, + { + "epoch": 0.002027849128024875, + "grad_norm": 42.60896682739258, + "learning_rate": 1.69921875e-06, + "loss": 0.634033203125, + "step": 30 + }, + { + "epoch": 0.0020954440989590373, + "grad_norm": 47.93967819213867, + "learning_rate": 1.7578125e-06, + "loss": 0.61083984375, + "step": 31 + }, + { + "epoch": 0.0021630390698932, + "grad_norm": 68.33699798583984, + "learning_rate": 1.81640625e-06, + "loss": 0.564208984375, + "step": 32 + }, + { + "epoch": 0.0022306340408273623, + "grad_norm": 63.64045715332031, + "learning_rate": 1.875e-06, + "loss": 0.5703125, + "step": 33 + }, + { + "epoch": 0.002298229011761525, + "grad_norm": 64.88874053955078, + "learning_rate": 1.93359375e-06, + "loss": 0.527587890625, + "step": 34 + }, + { + "epoch": 0.0023658239826956874, + "grad_norm": 45.004852294921875, + "learning_rate": 1.9921875e-06, + "loss": 0.568359375, + "step": 35 + }, + { + "epoch": 0.00243341895362985, + "grad_norm": 49.235416412353516, + "learning_rate": 2.05078125e-06, + "loss": 0.5439453125, + "step": 36 + }, + { + "epoch": 0.0025010139245640124, + "grad_norm": 57.90715408325195, + "learning_rate": 2.109375e-06, + "loss": 0.508544921875, + "step": 37 + }, + { + "epoch": 0.0025686088954981747, + "grad_norm": 55.72802734375, + "learning_rate": 2.16796875e-06, + "loss": 0.50634765625, + "step": 38 + }, + { + "epoch": 0.0026362038664323375, + "grad_norm": 55.22311019897461, + "learning_rate": 2.2265625e-06, + "loss": 0.5001220703125, + "step": 39 + }, + { + "epoch": 0.0027037988373665, + "grad_norm": 30.505596160888672, + "learning_rate": 2.28515625e-06, + "loss": 0.53466796875, + "step": 40 + }, + { + "epoch": 0.0027713938083006625, + "grad_norm": 40.60301208496094, + "learning_rate": 2.3437500000000002e-06, + "loss": 0.441162109375, + "step": 41 + }, + { + "epoch": 0.002838988779234825, + "grad_norm": 29.32118034362793, + "learning_rate": 2.40234375e-06, + "loss": 0.4488525390625, + "step": 42 + }, + { + "epoch": 0.0029065837501689876, + "grad_norm": 37.16704177856445, + "learning_rate": 2.4609375e-06, + "loss": 0.3912353515625, + "step": 43 + }, + { + "epoch": 0.00297417872110315, + "grad_norm": 17.935800552368164, + "learning_rate": 2.5195312500000003e-06, + "loss": 0.4827880859375, + "step": 44 + }, + { + "epoch": 0.0030417736920373122, + "grad_norm": 37.79851150512695, + "learning_rate": 2.578125e-06, + "loss": 0.3553466796875, + "step": 45 + }, + { + "epoch": 0.003109368662971475, + "grad_norm": 38.569602966308594, + "learning_rate": 2.63671875e-06, + "loss": 0.33837890625, + "step": 46 + }, + { + "epoch": 0.0031769636339056373, + "grad_norm": 3.8150794506073, + "learning_rate": 2.6953125e-06, + "loss": 0.53662109375, + "step": 47 + }, + { + "epoch": 0.0032445586048398, + "grad_norm": 28.0899658203125, + "learning_rate": 2.75390625e-06, + "loss": 0.3699951171875, + "step": 48 + }, + { + "epoch": 0.0033121535757739623, + "grad_norm": 28.612751007080078, + "learning_rate": 2.8125e-06, + "loss": 0.3499755859375, + "step": 49 + }, + { + "epoch": 0.003379748546708125, + "grad_norm": 26.89038848876953, + "learning_rate": 2.87109375e-06, + "loss": 0.3507080078125, + "step": 50 + }, + { + "epoch": 0.0034473435176422874, + "grad_norm": 21.539562225341797, + "learning_rate": 2.9296875e-06, + "loss": 0.3743896484375, + "step": 51 + }, + { + "epoch": 0.0035149384885764497, + "grad_norm": 7.454199314117432, + "learning_rate": 2.9882812500000002e-06, + "loss": 0.462646484375, + "step": 52 + }, + { + "epoch": 0.0035825334595106124, + "grad_norm": 29.62181282043457, + "learning_rate": 3.046875e-06, + "loss": 0.274169921875, + "step": 53 + }, + { + "epoch": 0.0036501284304447748, + "grad_norm": 19.40483856201172, + "learning_rate": 3.10546875e-06, + "loss": 0.336669921875, + "step": 54 + }, + { + "epoch": 0.0037177234013789375, + "grad_norm": 8.735546112060547, + "learning_rate": 3.1640625000000003e-06, + "loss": 0.4091796875, + "step": 55 + }, + { + "epoch": 0.0037853183723131, + "grad_norm": 20.975584030151367, + "learning_rate": 3.22265625e-06, + "loss": 0.2823486328125, + "step": 56 + }, + { + "epoch": 0.0038529133432472626, + "grad_norm": 1.2495471239089966, + "learning_rate": 3.28125e-06, + "loss": 0.4541015625, + "step": 57 + }, + { + "epoch": 0.003920508314181425, + "grad_norm": 18.441896438598633, + "learning_rate": 3.3398437500000003e-06, + "loss": 0.28070068359375, + "step": 58 + }, + { + "epoch": 0.003988103285115587, + "grad_norm": 3.753969192504883, + "learning_rate": 3.3984375e-06, + "loss": 0.4713134765625, + "step": 59 + }, + { + "epoch": 0.00405569825604975, + "grad_norm": 9.129305839538574, + "learning_rate": 3.45703125e-06, + "loss": 0.3941650390625, + "step": 60 + }, + { + "epoch": 0.004123293226983913, + "grad_norm": 19.663448333740234, + "learning_rate": 3.515625e-06, + "loss": 0.3128662109375, + "step": 61 + }, + { + "epoch": 0.0041908881979180745, + "grad_norm": 7.594383716583252, + "learning_rate": 3.57421875e-06, + "loss": 0.48992919921875, + "step": 62 + }, + { + "epoch": 0.004258483168852237, + "grad_norm": 20.970163345336914, + "learning_rate": 3.6328125e-06, + "loss": 0.22454833984375, + "step": 63 + }, + { + "epoch": 0.0043260781397864, + "grad_norm": 7.247081279754639, + "learning_rate": 3.69140625e-06, + "loss": 0.36199951171875, + "step": 64 + }, + { + "epoch": 0.004393673110720563, + "grad_norm": 8.364009857177734, + "learning_rate": 3.75e-06, + "loss": 0.33203125, + "step": 65 + }, + { + "epoch": 0.004461268081654725, + "grad_norm": 13.92497730255127, + "learning_rate": 3.8085937500000002e-06, + "loss": 0.23455810546875, + "step": 66 + }, + { + "epoch": 0.004528863052588887, + "grad_norm": 5.543440341949463, + "learning_rate": 3.8671875e-06, + "loss": 0.40692138671875, + "step": 67 + }, + { + "epoch": 0.00459645802352305, + "grad_norm": 12.135955810546875, + "learning_rate": 3.92578125e-06, + "loss": 0.35260009765625, + "step": 68 + }, + { + "epoch": 0.004664052994457212, + "grad_norm": 6.634083271026611, + "learning_rate": 3.984375e-06, + "loss": 0.37969970703125, + "step": 69 + }, + { + "epoch": 0.004731647965391375, + "grad_norm": 3.551485776901245, + "learning_rate": 4.0429687500000004e-06, + "loss": 0.41851806640625, + "step": 70 + }, + { + "epoch": 0.0047992429363255375, + "grad_norm": 9.182619094848633, + "learning_rate": 4.1015625e-06, + "loss": 0.325469970703125, + "step": 71 + }, + { + "epoch": 0.0048668379072597, + "grad_norm": 9.989202499389648, + "learning_rate": 4.16015625e-06, + "loss": 0.26397705078125, + "step": 72 + }, + { + "epoch": 0.004934432878193862, + "grad_norm": 5.08259391784668, + "learning_rate": 4.21875e-06, + "loss": 0.4971923828125, + "step": 73 + }, + { + "epoch": 0.005002027849128025, + "grad_norm": 15.177289009094238, + "learning_rate": 4.27734375e-06, + "loss": 0.30303955078125, + "step": 74 + }, + { + "epoch": 0.005069622820062188, + "grad_norm": 8.712221145629883, + "learning_rate": 4.3359375e-06, + "loss": 0.27032470703125, + "step": 75 + }, + { + "epoch": 0.0051372177909963495, + "grad_norm": 14.464534759521484, + "learning_rate": 4.3945312500000005e-06, + "loss": 0.32122802734375, + "step": 76 + }, + { + "epoch": 0.005204812761930512, + "grad_norm": 15.904252052307129, + "learning_rate": 4.453125e-06, + "loss": 0.215545654296875, + "step": 77 + }, + { + "epoch": 0.005272407732864675, + "grad_norm": 12.820080757141113, + "learning_rate": 4.51171875e-06, + "loss": 0.37139892578125, + "step": 78 + }, + { + "epoch": 0.005340002703798838, + "grad_norm": 13.00821590423584, + "learning_rate": 4.5703125e-06, + "loss": 0.17889404296875, + "step": 79 + }, + { + "epoch": 0.005407597674733, + "grad_norm": 20.768110275268555, + "learning_rate": 4.62890625e-06, + "loss": 0.23175048828125, + "step": 80 + }, + { + "epoch": 0.005475192645667162, + "grad_norm": 16.972320556640625, + "learning_rate": 4.6875000000000004e-06, + "loss": 0.19781494140625, + "step": 81 + }, + { + "epoch": 0.005542787616601325, + "grad_norm": 20.61986541748047, + "learning_rate": 4.74609375e-06, + "loss": 0.2891845703125, + "step": 82 + }, + { + "epoch": 0.005610382587535487, + "grad_norm": 18.407934188842773, + "learning_rate": 4.8046875e-06, + "loss": 0.22216796875, + "step": 83 + }, + { + "epoch": 0.00567797755846965, + "grad_norm": 8.053304672241211, + "learning_rate": 4.86328125e-06, + "loss": 0.3997802734375, + "step": 84 + }, + { + "epoch": 0.0057455725294038125, + "grad_norm": 28.55884552001953, + "learning_rate": 4.921875e-06, + "loss": 0.24359130859375, + "step": 85 + }, + { + "epoch": 0.005813167500337975, + "grad_norm": 8.434625625610352, + "learning_rate": 4.98046875e-06, + "loss": 0.27423095703125, + "step": 86 + }, + { + "epoch": 0.005880762471272137, + "grad_norm": 43.32260513305664, + "learning_rate": 5.0390625000000005e-06, + "loss": 0.33221435546875, + "step": 87 + }, + { + "epoch": 0.0059483574422063, + "grad_norm": 28.449525833129883, + "learning_rate": 5.09765625e-06, + "loss": 0.43560791015625, + "step": 88 + }, + { + "epoch": 0.006015952413140463, + "grad_norm": 9.839138984680176, + "learning_rate": 5.15625e-06, + "loss": 0.233489990234375, + "step": 89 + }, + { + "epoch": 0.0060835473840746244, + "grad_norm": 20.638906478881836, + "learning_rate": 5.21484375e-06, + "loss": 0.38787841796875, + "step": 90 + }, + { + "epoch": 0.006151142355008787, + "grad_norm": 32.62936782836914, + "learning_rate": 5.2734375e-06, + "loss": 0.33233642578125, + "step": 91 + }, + { + "epoch": 0.00621873732594295, + "grad_norm": 40.349552154541016, + "learning_rate": 5.3320312500000004e-06, + "loss": 0.2601318359375, + "step": 92 + }, + { + "epoch": 0.006286332296877113, + "grad_norm": 8.056031227111816, + "learning_rate": 5.390625e-06, + "loss": 0.341552734375, + "step": 93 + }, + { + "epoch": 0.0063539272678112746, + "grad_norm": 10.3848237991333, + "learning_rate": 5.44921875e-06, + "loss": 0.29156494140625, + "step": 94 + }, + { + "epoch": 0.006421522238745437, + "grad_norm": 16.09223175048828, + "learning_rate": 5.5078125e-06, + "loss": 0.18524169921875, + "step": 95 + }, + { + "epoch": 0.0064891172096796, + "grad_norm": 12.529052734375, + "learning_rate": 5.56640625e-06, + "loss": 0.33050537109375, + "step": 96 + }, + { + "epoch": 0.006556712180613762, + "grad_norm": 16.260839462280273, + "learning_rate": 5.625e-06, + "loss": 0.14825439453125, + "step": 97 + }, + { + "epoch": 0.006624307151547925, + "grad_norm": 3.1753385066986084, + "learning_rate": 5.6835937500000005e-06, + "loss": 0.38385009765625, + "step": 98 + }, + { + "epoch": 0.006691902122482087, + "grad_norm": 6.3881425857543945, + "learning_rate": 5.7421875e-06, + "loss": 0.128448486328125, + "step": 99 + }, + { + "epoch": 0.00675949709341625, + "grad_norm": 24.457141876220703, + "learning_rate": 5.80078125e-06, + "loss": 0.37646484375, + "step": 100 + }, + { + "epoch": 0.006827092064350412, + "grad_norm": 17.43049430847168, + "learning_rate": 5.859375e-06, + "loss": 0.2811279296875, + "step": 101 + }, + { + "epoch": 0.006894687035284575, + "grad_norm": 9.845346450805664, + "learning_rate": 5.91796875e-06, + "loss": 0.2958984375, + "step": 102 + }, + { + "epoch": 0.0069622820062187375, + "grad_norm": 12.22458267211914, + "learning_rate": 5.9765625000000004e-06, + "loss": 0.4610595703125, + "step": 103 + }, + { + "epoch": 0.007029876977152899, + "grad_norm": 16.9343204498291, + "learning_rate": 6.03515625e-06, + "loss": 0.208099365234375, + "step": 104 + }, + { + "epoch": 0.007097471948087062, + "grad_norm": 8.883051872253418, + "learning_rate": 6.09375e-06, + "loss": 0.127685546875, + "step": 105 + }, + { + "epoch": 0.007165066919021225, + "grad_norm": 10.74145221710205, + "learning_rate": 6.15234375e-06, + "loss": 0.31689453125, + "step": 106 + }, + { + "epoch": 0.007232661889955388, + "grad_norm": 21.377145767211914, + "learning_rate": 6.2109375e-06, + "loss": 0.213134765625, + "step": 107 + }, + { + "epoch": 0.0073002568608895495, + "grad_norm": 9.589807510375977, + "learning_rate": 6.26953125e-06, + "loss": 0.3040771484375, + "step": 108 + }, + { + "epoch": 0.007367851831823712, + "grad_norm": 39.94683074951172, + "learning_rate": 6.3281250000000005e-06, + "loss": 0.607177734375, + "step": 109 + }, + { + "epoch": 0.007435446802757875, + "grad_norm": 3.6789066791534424, + "learning_rate": 6.38671875e-06, + "loss": 0.3328857421875, + "step": 110 + }, + { + "epoch": 0.007503041773692038, + "grad_norm": 6.084640979766846, + "learning_rate": 6.4453125e-06, + "loss": 0.3369140625, + "step": 111 + }, + { + "epoch": 0.0075706367446262, + "grad_norm": 6.873464584350586, + "learning_rate": 6.50390625e-06, + "loss": 0.35394287109375, + "step": 112 + }, + { + "epoch": 0.007638231715560362, + "grad_norm": 6.129619598388672, + "learning_rate": 6.5625e-06, + "loss": 0.35980224609375, + "step": 113 + }, + { + "epoch": 0.007705826686494525, + "grad_norm": 4.86572790145874, + "learning_rate": 6.6210937500000004e-06, + "loss": 0.28240966796875, + "step": 114 + }, + { + "epoch": 0.007773421657428687, + "grad_norm": 27.30777931213379, + "learning_rate": 6.679687500000001e-06, + "loss": 0.183837890625, + "step": 115 + }, + { + "epoch": 0.00784101662836285, + "grad_norm": 14.20505428314209, + "learning_rate": 6.73828125e-06, + "loss": 0.3028564453125, + "step": 116 + }, + { + "epoch": 0.007908611599297012, + "grad_norm": 5.6076884269714355, + "learning_rate": 6.796875e-06, + "loss": 0.1405029296875, + "step": 117 + }, + { + "epoch": 0.007976206570231174, + "grad_norm": 23.48346710205078, + "learning_rate": 6.85546875e-06, + "loss": 0.52008056640625, + "step": 118 + }, + { + "epoch": 0.008043801541165338, + "grad_norm": 9.63930892944336, + "learning_rate": 6.9140625e-06, + "loss": 0.101409912109375, + "step": 119 + }, + { + "epoch": 0.0081113965120995, + "grad_norm": 7.81471061706543, + "learning_rate": 6.9726562500000005e-06, + "loss": 0.19256591796875, + "step": 120 + }, + { + "epoch": 0.008178991483033662, + "grad_norm": 6.092757225036621, + "learning_rate": 7.03125e-06, + "loss": 0.3804931640625, + "step": 121 + }, + { + "epoch": 0.008246586453967825, + "grad_norm": 4.345539093017578, + "learning_rate": 7.08984375e-06, + "loss": 0.163421630859375, + "step": 122 + }, + { + "epoch": 0.008314181424901987, + "grad_norm": 6.06764554977417, + "learning_rate": 7.1484375e-06, + "loss": 0.43292236328125, + "step": 123 + }, + { + "epoch": 0.008381776395836149, + "grad_norm": 32.519405364990234, + "learning_rate": 7.20703125e-06, + "loss": 0.39874267578125, + "step": 124 + }, + { + "epoch": 0.008449371366770313, + "grad_norm": 36.39859390258789, + "learning_rate": 7.265625e-06, + "loss": 0.4112548828125, + "step": 125 + }, + { + "epoch": 0.008516966337704475, + "grad_norm": 23.753530502319336, + "learning_rate": 7.3242187500000006e-06, + "loss": 0.326385498046875, + "step": 126 + }, + { + "epoch": 0.008584561308638636, + "grad_norm": 24.39263916015625, + "learning_rate": 7.3828125e-06, + "loss": 0.33905029296875, + "step": 127 + }, + { + "epoch": 0.0086521562795728, + "grad_norm": 41.77408981323242, + "learning_rate": 7.44140625e-06, + "loss": 0.3780517578125, + "step": 128 + }, + { + "epoch": 0.008719751250506962, + "grad_norm": 35.98897171020508, + "learning_rate": 7.5e-06, + "loss": 0.29071044921875, + "step": 129 + }, + { + "epoch": 0.008787346221441126, + "grad_norm": 9.065729141235352, + "learning_rate": 7.55859375e-06, + "loss": 0.357391357421875, + "step": 130 + }, + { + "epoch": 0.008854941192375287, + "grad_norm": 14.883200645446777, + "learning_rate": 7.6171875000000005e-06, + "loss": 0.2353515625, + "step": 131 + }, + { + "epoch": 0.00892253616330945, + "grad_norm": 9.871024131774902, + "learning_rate": 7.67578125e-06, + "loss": 0.167694091796875, + "step": 132 + }, + { + "epoch": 0.008990131134243613, + "grad_norm": 3.6580350399017334, + "learning_rate": 7.734375e-06, + "loss": 0.414794921875, + "step": 133 + }, + { + "epoch": 0.009057726105177775, + "grad_norm": 8.253684997558594, + "learning_rate": 7.792968750000001e-06, + "loss": 0.3365478515625, + "step": 134 + }, + { + "epoch": 0.009125321076111937, + "grad_norm": 10.366223335266113, + "learning_rate": 7.8515625e-06, + "loss": 0.154541015625, + "step": 135 + }, + { + "epoch": 0.0091929160470461, + "grad_norm": 14.36552619934082, + "learning_rate": 7.91015625e-06, + "loss": 0.364105224609375, + "step": 136 + }, + { + "epoch": 0.009260511017980262, + "grad_norm": 24.167543411254883, + "learning_rate": 7.96875e-06, + "loss": 0.213409423828125, + "step": 137 + }, + { + "epoch": 0.009328105988914424, + "grad_norm": 14.960211753845215, + "learning_rate": 8.02734375e-06, + "loss": 0.208221435546875, + "step": 138 + }, + { + "epoch": 0.009395700959848588, + "grad_norm": 10.89179515838623, + "learning_rate": 8.085937500000001e-06, + "loss": 0.28765869140625, + "step": 139 + }, + { + "epoch": 0.00946329593078275, + "grad_norm": 19.776676177978516, + "learning_rate": 8.14453125e-06, + "loss": 0.27313232421875, + "step": 140 + }, + { + "epoch": 0.009530890901716911, + "grad_norm": 8.036362648010254, + "learning_rate": 8.203125e-06, + "loss": 0.16204833984375, + "step": 141 + }, + { + "epoch": 0.009598485872651075, + "grad_norm": 28.314735412597656, + "learning_rate": 8.26171875e-06, + "loss": 0.3685302734375, + "step": 142 + }, + { + "epoch": 0.009666080843585237, + "grad_norm": 10.628405570983887, + "learning_rate": 8.3203125e-06, + "loss": 0.160369873046875, + "step": 143 + }, + { + "epoch": 0.0097336758145194, + "grad_norm": 12.793607711791992, + "learning_rate": 8.37890625e-06, + "loss": 0.290130615234375, + "step": 144 + }, + { + "epoch": 0.009801270785453562, + "grad_norm": 10.363785743713379, + "learning_rate": 8.4375e-06, + "loss": 0.292510986328125, + "step": 145 + }, + { + "epoch": 0.009868865756387724, + "grad_norm": 10.115606307983398, + "learning_rate": 8.49609375e-06, + "loss": 0.222808837890625, + "step": 146 + }, + { + "epoch": 0.009936460727321888, + "grad_norm": 22.626766204833984, + "learning_rate": 8.5546875e-06, + "loss": 0.116790771484375, + "step": 147 + }, + { + "epoch": 0.01000405569825605, + "grad_norm": 5.834292888641357, + "learning_rate": 8.61328125e-06, + "loss": 0.283172607421875, + "step": 148 + }, + { + "epoch": 0.010071650669190212, + "grad_norm": 4.886358737945557, + "learning_rate": 8.671875e-06, + "loss": 0.068572998046875, + "step": 149 + }, + { + "epoch": 0.010139245640124375, + "grad_norm": 8.676565170288086, + "learning_rate": 8.73046875e-06, + "loss": 0.23089599609375, + "step": 150 + }, + { + "epoch": 0.010206840611058537, + "grad_norm": 3.529041051864624, + "learning_rate": 8.789062500000001e-06, + "loss": 0.144927978515625, + "step": 151 + }, + { + "epoch": 0.010274435581992699, + "grad_norm": 7.559211254119873, + "learning_rate": 8.84765625e-06, + "loss": 0.172027587890625, + "step": 152 + }, + { + "epoch": 0.010342030552926863, + "grad_norm": 4.678627967834473, + "learning_rate": 8.90625e-06, + "loss": 0.328857421875, + "step": 153 + }, + { + "epoch": 0.010409625523861024, + "grad_norm": 6.246919631958008, + "learning_rate": 8.96484375e-06, + "loss": 0.311920166015625, + "step": 154 + }, + { + "epoch": 0.010477220494795188, + "grad_norm": 4.6827778816223145, + "learning_rate": 9.0234375e-06, + "loss": 0.177825927734375, + "step": 155 + }, + { + "epoch": 0.01054481546572935, + "grad_norm": 12.54141902923584, + "learning_rate": 9.082031250000001e-06, + "loss": 0.241180419921875, + "step": 156 + }, + { + "epoch": 0.010612410436663512, + "grad_norm": 5.541639804840088, + "learning_rate": 9.140625e-06, + "loss": 0.2130279541015625, + "step": 157 + }, + { + "epoch": 0.010680005407597675, + "grad_norm": 21.799394607543945, + "learning_rate": 9.19921875e-06, + "loss": 0.2818603515625, + "step": 158 + }, + { + "epoch": 0.010747600378531837, + "grad_norm": 28.697420120239258, + "learning_rate": 9.2578125e-06, + "loss": 0.291259765625, + "step": 159 + }, + { + "epoch": 0.010815195349466, + "grad_norm": 10.662663459777832, + "learning_rate": 9.31640625e-06, + "loss": 0.4346923828125, + "step": 160 + }, + { + "epoch": 0.010882790320400163, + "grad_norm": 19.423067092895508, + "learning_rate": 9.375000000000001e-06, + "loss": 0.350860595703125, + "step": 161 + }, + { + "epoch": 0.010950385291334325, + "grad_norm": 22.509904861450195, + "learning_rate": 9.43359375e-06, + "loss": 0.40582275390625, + "step": 162 + }, + { + "epoch": 0.011017980262268487, + "grad_norm": 13.629359245300293, + "learning_rate": 9.4921875e-06, + "loss": 0.264495849609375, + "step": 163 + }, + { + "epoch": 0.01108557523320265, + "grad_norm": 9.048948287963867, + "learning_rate": 9.55078125e-06, + "loss": 0.406494140625, + "step": 164 + }, + { + "epoch": 0.011153170204136812, + "grad_norm": 12.351283073425293, + "learning_rate": 9.609375e-06, + "loss": 0.286895751953125, + "step": 165 + }, + { + "epoch": 0.011220765175070974, + "grad_norm": 4.786278247833252, + "learning_rate": 9.66796875e-06, + "loss": 0.2571258544921875, + "step": 166 + }, + { + "epoch": 0.011288360146005138, + "grad_norm": 7.153155326843262, + "learning_rate": 9.7265625e-06, + "loss": 0.20550537109375, + "step": 167 + }, + { + "epoch": 0.0113559551169393, + "grad_norm": 17.428773880004883, + "learning_rate": 9.78515625e-06, + "loss": 0.248779296875, + "step": 168 + }, + { + "epoch": 0.011423550087873463, + "grad_norm": 31.437950134277344, + "learning_rate": 9.84375e-06, + "loss": 0.3345489501953125, + "step": 169 + }, + { + "epoch": 0.011491145058807625, + "grad_norm": 5.1431965827941895, + "learning_rate": 9.90234375e-06, + "loss": 0.0829010009765625, + "step": 170 + }, + { + "epoch": 0.011558740029741787, + "grad_norm": 17.71570587158203, + "learning_rate": 9.9609375e-06, + "loss": 0.19110107421875, + "step": 171 + }, + { + "epoch": 0.01162633500067595, + "grad_norm": 25.76549530029297, + "learning_rate": 1.001953125e-05, + "loss": 0.229278564453125, + "step": 172 + }, + { + "epoch": 0.011693929971610112, + "grad_norm": 17.384584426879883, + "learning_rate": 1.0078125000000001e-05, + "loss": 0.32861328125, + "step": 173 + }, + { + "epoch": 0.011761524942544274, + "grad_norm": 8.58968734741211, + "learning_rate": 1.013671875e-05, + "loss": 0.273406982421875, + "step": 174 + }, + { + "epoch": 0.011829119913478438, + "grad_norm": 8.57683277130127, + "learning_rate": 1.01953125e-05, + "loss": 0.186859130859375, + "step": 175 + }, + { + "epoch": 0.0118967148844126, + "grad_norm": 10.398244857788086, + "learning_rate": 1.025390625e-05, + "loss": 0.18975830078125, + "step": 176 + }, + { + "epoch": 0.011964309855346762, + "grad_norm": 13.167986869812012, + "learning_rate": 1.03125e-05, + "loss": 0.285888671875, + "step": 177 + }, + { + "epoch": 0.012031904826280925, + "grad_norm": 3.3726272583007812, + "learning_rate": 1.0371093750000001e-05, + "loss": 0.15338134765625, + "step": 178 + }, + { + "epoch": 0.012099499797215087, + "grad_norm": 16.551237106323242, + "learning_rate": 1.04296875e-05, + "loss": 0.35382080078125, + "step": 179 + }, + { + "epoch": 0.012167094768149249, + "grad_norm": 14.98691177368164, + "learning_rate": 1.048828125e-05, + "loss": 0.2234039306640625, + "step": 180 + }, + { + "epoch": 0.012234689739083413, + "grad_norm": 9.034643173217773, + "learning_rate": 1.0546875e-05, + "loss": 0.36334228515625, + "step": 181 + }, + { + "epoch": 0.012302284710017574, + "grad_norm": 5.315864086151123, + "learning_rate": 1.060546875e-05, + "loss": 0.34356689453125, + "step": 182 + }, + { + "epoch": 0.012369879680951738, + "grad_norm": 5.760006904602051, + "learning_rate": 1.0664062500000001e-05, + "loss": 0.219085693359375, + "step": 183 + }, + { + "epoch": 0.0124374746518859, + "grad_norm": 10.594557762145996, + "learning_rate": 1.072265625e-05, + "loss": 0.202301025390625, + "step": 184 + }, + { + "epoch": 0.012505069622820062, + "grad_norm": 25.992443084716797, + "learning_rate": 1.078125e-05, + "loss": 0.414794921875, + "step": 185 + }, + { + "epoch": 0.012572664593754225, + "grad_norm": 4.990076541900635, + "learning_rate": 1.083984375e-05, + "loss": 0.129425048828125, + "step": 186 + }, + { + "epoch": 0.012640259564688387, + "grad_norm": 2.63018536567688, + "learning_rate": 1.08984375e-05, + "loss": 0.16583251953125, + "step": 187 + }, + { + "epoch": 0.012707854535622549, + "grad_norm": 2.4022207260131836, + "learning_rate": 1.095703125e-05, + "loss": 0.06262969970703125, + "step": 188 + }, + { + "epoch": 0.012775449506556713, + "grad_norm": 8.01939582824707, + "learning_rate": 1.1015625e-05, + "loss": 0.27252197265625, + "step": 189 + }, + { + "epoch": 0.012843044477490875, + "grad_norm": 4.417593479156494, + "learning_rate": 1.1074218750000001e-05, + "loss": 0.087799072265625, + "step": 190 + }, + { + "epoch": 0.012910639448425036, + "grad_norm": 23.585046768188477, + "learning_rate": 1.11328125e-05, + "loss": 0.129058837890625, + "step": 191 + }, + { + "epoch": 0.0129782344193592, + "grad_norm": 18.312528610229492, + "learning_rate": 1.119140625e-05, + "loss": 0.30401611328125, + "step": 192 + }, + { + "epoch": 0.013045829390293362, + "grad_norm": 15.939261436462402, + "learning_rate": 1.125e-05, + "loss": 0.12371826171875, + "step": 193 + }, + { + "epoch": 0.013113424361227524, + "grad_norm": 8.556756019592285, + "learning_rate": 1.130859375e-05, + "loss": 0.1912841796875, + "step": 194 + }, + { + "epoch": 0.013181019332161687, + "grad_norm": 7.09650993347168, + "learning_rate": 1.1367187500000001e-05, + "loss": 0.1513671875, + "step": 195 + }, + { + "epoch": 0.01324861430309585, + "grad_norm": 4.582852363586426, + "learning_rate": 1.142578125e-05, + "loss": 0.352294921875, + "step": 196 + }, + { + "epoch": 0.013316209274030013, + "grad_norm": 24.764942169189453, + "learning_rate": 1.1484375e-05, + "loss": 0.497314453125, + "step": 197 + }, + { + "epoch": 0.013383804244964175, + "grad_norm": 14.98612117767334, + "learning_rate": 1.154296875e-05, + "loss": 0.3841552734375, + "step": 198 + }, + { + "epoch": 0.013451399215898337, + "grad_norm": 10.298518180847168, + "learning_rate": 1.16015625e-05, + "loss": 0.4228515625, + "step": 199 + }, + { + "epoch": 0.0135189941868325, + "grad_norm": 18.445411682128906, + "learning_rate": 1.1660156250000001e-05, + "loss": 0.146270751953125, + "step": 200 + }, + { + "epoch": 0.013586589157766662, + "grad_norm": 23.52071189880371, + "learning_rate": 1.171875e-05, + "loss": 0.28955078125, + "step": 201 + }, + { + "epoch": 0.013654184128700824, + "grad_norm": 9.860815048217773, + "learning_rate": 1.177734375e-05, + "loss": 0.25103759765625, + "step": 202 + }, + { + "epoch": 0.013721779099634988, + "grad_norm": 15.70622444152832, + "learning_rate": 1.18359375e-05, + "loss": 0.1109771728515625, + "step": 203 + }, + { + "epoch": 0.01378937407056915, + "grad_norm": 23.44267463684082, + "learning_rate": 1.189453125e-05, + "loss": 0.1806182861328125, + "step": 204 + }, + { + "epoch": 0.013856969041503311, + "grad_norm": 15.646227836608887, + "learning_rate": 1.1953125000000001e-05, + "loss": 0.193511962890625, + "step": 205 + }, + { + "epoch": 0.013924564012437475, + "grad_norm": 6.322649002075195, + "learning_rate": 1.201171875e-05, + "loss": 0.267669677734375, + "step": 206 + }, + { + "epoch": 0.013992158983371637, + "grad_norm": 14.866901397705078, + "learning_rate": 1.20703125e-05, + "loss": 0.34185791015625, + "step": 207 + }, + { + "epoch": 0.014059753954305799, + "grad_norm": 20.53168487548828, + "learning_rate": 1.212890625e-05, + "loss": 0.41168212890625, + "step": 208 + }, + { + "epoch": 0.014127348925239962, + "grad_norm": 27.044822692871094, + "learning_rate": 1.21875e-05, + "loss": 0.35272216796875, + "step": 209 + }, + { + "epoch": 0.014194943896174124, + "grad_norm": 10.513531684875488, + "learning_rate": 1.224609375e-05, + "loss": 0.36285400390625, + "step": 210 + }, + { + "epoch": 0.014262538867108288, + "grad_norm": 11.403712272644043, + "learning_rate": 1.23046875e-05, + "loss": 0.159210205078125, + "step": 211 + }, + { + "epoch": 0.01433013383804245, + "grad_norm": 13.077483177185059, + "learning_rate": 1.2363281250000001e-05, + "loss": 0.3470458984375, + "step": 212 + }, + { + "epoch": 0.014397728808976612, + "grad_norm": 10.310188293457031, + "learning_rate": 1.2421875e-05, + "loss": 0.1960296630859375, + "step": 213 + }, + { + "epoch": 0.014465323779910775, + "grad_norm": 5.740678787231445, + "learning_rate": 1.248046875e-05, + "loss": 0.2139739990234375, + "step": 214 + }, + { + "epoch": 0.014532918750844937, + "grad_norm": 7.069823741912842, + "learning_rate": 1.25390625e-05, + "loss": 0.2166290283203125, + "step": 215 + }, + { + "epoch": 0.014600513721779099, + "grad_norm": 2.5440268516540527, + "learning_rate": 1.259765625e-05, + "loss": 0.086883544921875, + "step": 216 + }, + { + "epoch": 0.014668108692713263, + "grad_norm": 5.248499393463135, + "learning_rate": 1.2656250000000001e-05, + "loss": 0.23419189453125, + "step": 217 + }, + { + "epoch": 0.014735703663647425, + "grad_norm": 4.85791015625, + "learning_rate": 1.271484375e-05, + "loss": 0.10382080078125, + "step": 218 + }, + { + "epoch": 0.014803298634581586, + "grad_norm": 9.530184745788574, + "learning_rate": 1.27734375e-05, + "loss": 0.2280426025390625, + "step": 219 + }, + { + "epoch": 0.01487089360551575, + "grad_norm": 22.10040283203125, + "learning_rate": 1.283203125e-05, + "loss": 0.44354248046875, + "step": 220 + }, + { + "epoch": 0.014938488576449912, + "grad_norm": 8.19511890411377, + "learning_rate": 1.2890625e-05, + "loss": 0.12213134765625, + "step": 221 + }, + { + "epoch": 0.015006083547384075, + "grad_norm": 21.826244354248047, + "learning_rate": 1.2949218750000001e-05, + "loss": 0.301910400390625, + "step": 222 + }, + { + "epoch": 0.015073678518318237, + "grad_norm": 9.318695068359375, + "learning_rate": 1.30078125e-05, + "loss": 0.1735687255859375, + "step": 223 + }, + { + "epoch": 0.0151412734892524, + "grad_norm": 12.734342575073242, + "learning_rate": 1.306640625e-05, + "loss": 0.230560302734375, + "step": 224 + }, + { + "epoch": 0.015208868460186563, + "grad_norm": 8.291314125061035, + "learning_rate": 1.3125e-05, + "loss": 0.20549774169921875, + "step": 225 + }, + { + "epoch": 0.015276463431120725, + "grad_norm": 2.5374932289123535, + "learning_rate": 1.318359375e-05, + "loss": 0.0973968505859375, + "step": 226 + }, + { + "epoch": 0.015344058402054887, + "grad_norm": 11.822149276733398, + "learning_rate": 1.3242187500000001e-05, + "loss": 0.28619384765625, + "step": 227 + }, + { + "epoch": 0.01541165337298905, + "grad_norm": 2.8807249069213867, + "learning_rate": 1.330078125e-05, + "loss": 0.10540771484375, + "step": 228 + }, + { + "epoch": 0.015479248343923212, + "grad_norm": 4.158577919006348, + "learning_rate": 1.3359375000000001e-05, + "loss": 0.2145538330078125, + "step": 229 + }, + { + "epoch": 0.015546843314857374, + "grad_norm": 6.464020729064941, + "learning_rate": 1.341796875e-05, + "loss": 0.1768798828125, + "step": 230 + }, + { + "epoch": 0.015614438285791538, + "grad_norm": 3.1151866912841797, + "learning_rate": 1.34765625e-05, + "loss": 0.118316650390625, + "step": 231 + }, + { + "epoch": 0.0156820332567257, + "grad_norm": 14.999964714050293, + "learning_rate": 1.353515625e-05, + "loss": 0.18001556396484375, + "step": 232 + }, + { + "epoch": 0.01574962822765986, + "grad_norm": 33.79302978515625, + "learning_rate": 1.359375e-05, + "loss": 0.400421142578125, + "step": 233 + }, + { + "epoch": 0.015817223198594025, + "grad_norm": 7.522545337677002, + "learning_rate": 1.3652343750000001e-05, + "loss": 0.181793212890625, + "step": 234 + }, + { + "epoch": 0.01588481816952819, + "grad_norm": 10.539345741271973, + "learning_rate": 1.37109375e-05, + "loss": 0.23944091796875, + "step": 235 + }, + { + "epoch": 0.01595241314046235, + "grad_norm": 4.608603477478027, + "learning_rate": 1.376953125e-05, + "loss": 0.130859375, + "step": 236 + }, + { + "epoch": 0.016020008111396512, + "grad_norm": 3.9855220317840576, + "learning_rate": 1.3828125e-05, + "loss": 0.2270660400390625, + "step": 237 + }, + { + "epoch": 0.016087603082330676, + "grad_norm": 1.502336859703064, + "learning_rate": 1.388671875e-05, + "loss": 0.0856781005859375, + "step": 238 + }, + { + "epoch": 0.016155198053264836, + "grad_norm": 4.668871879577637, + "learning_rate": 1.3945312500000001e-05, + "loss": 0.2152557373046875, + "step": 239 + }, + { + "epoch": 0.016222793024199, + "grad_norm": 6.147217273712158, + "learning_rate": 1.400390625e-05, + "loss": 0.2525787353515625, + "step": 240 + }, + { + "epoch": 0.016290387995133163, + "grad_norm": 11.78864860534668, + "learning_rate": 1.40625e-05, + "loss": 0.29144287109375, + "step": 241 + }, + { + "epoch": 0.016357982966067323, + "grad_norm": 3.2561838626861572, + "learning_rate": 1.412109375e-05, + "loss": 0.2051544189453125, + "step": 242 + }, + { + "epoch": 0.016425577937001487, + "grad_norm": 8.274458885192871, + "learning_rate": 1.41796875e-05, + "loss": 0.42156982421875, + "step": 243 + }, + { + "epoch": 0.01649317290793565, + "grad_norm": 4.974515438079834, + "learning_rate": 1.4238281250000001e-05, + "loss": 0.250732421875, + "step": 244 + }, + { + "epoch": 0.01656076787886981, + "grad_norm": 4.920560836791992, + "learning_rate": 1.4296875e-05, + "loss": 0.3739013671875, + "step": 245 + }, + { + "epoch": 0.016628362849803974, + "grad_norm": 13.598729133605957, + "learning_rate": 1.435546875e-05, + "loss": 0.299163818359375, + "step": 246 + }, + { + "epoch": 0.016695957820738138, + "grad_norm": 3.082841634750366, + "learning_rate": 1.44140625e-05, + "loss": 0.195281982421875, + "step": 247 + }, + { + "epoch": 0.016763552791672298, + "grad_norm": 15.92171573638916, + "learning_rate": 1.447265625e-05, + "loss": 0.30712890625, + "step": 248 + }, + { + "epoch": 0.016831147762606462, + "grad_norm": 18.186906814575195, + "learning_rate": 1.453125e-05, + "loss": 0.5130615234375, + "step": 249 + }, + { + "epoch": 0.016898742733540625, + "grad_norm": 8.228699684143066, + "learning_rate": 1.458984375e-05, + "loss": 0.33929443359375, + "step": 250 + }, + { + "epoch": 0.016966337704474786, + "grad_norm": 10.38945484161377, + "learning_rate": 1.4648437500000001e-05, + "loss": 0.25732421875, + "step": 251 + }, + { + "epoch": 0.01703393267540895, + "grad_norm": 19.27303123474121, + "learning_rate": 1.470703125e-05, + "loss": 0.4246826171875, + "step": 252 + }, + { + "epoch": 0.017101527646343113, + "grad_norm": 2.799294948577881, + "learning_rate": 1.4765625e-05, + "loss": 0.139129638671875, + "step": 253 + }, + { + "epoch": 0.017169122617277273, + "grad_norm": 5.991629600524902, + "learning_rate": 1.482421875e-05, + "loss": 0.37841796875, + "step": 254 + }, + { + "epoch": 0.017236717588211437, + "grad_norm": 3.190122604370117, + "learning_rate": 1.48828125e-05, + "loss": 0.09003829956054688, + "step": 255 + }, + { + "epoch": 0.0173043125591456, + "grad_norm": 5.990656852722168, + "learning_rate": 1.4941406250000001e-05, + "loss": 0.39031982421875, + "step": 256 + }, + { + "epoch": 0.017371907530079764, + "grad_norm": 4.271067142486572, + "learning_rate": 1.5e-05, + "loss": 0.33660888671875, + "step": 257 + }, + { + "epoch": 0.017439502501013924, + "grad_norm": 2.5699729919433594, + "learning_rate": 1.505859375e-05, + "loss": 0.18414306640625, + "step": 258 + }, + { + "epoch": 0.017507097471948087, + "grad_norm": 5.564748287200928, + "learning_rate": 1.51171875e-05, + "loss": 0.30621337890625, + "step": 259 + }, + { + "epoch": 0.01757469244288225, + "grad_norm": 5.095725059509277, + "learning_rate": 1.517578125e-05, + "loss": 0.170684814453125, + "step": 260 + }, + { + "epoch": 0.01764228741381641, + "grad_norm": 7.978644371032715, + "learning_rate": 1.5234375000000001e-05, + "loss": 0.1652679443359375, + "step": 261 + }, + { + "epoch": 0.017709882384750575, + "grad_norm": 3.946362257003784, + "learning_rate": 1.529296875e-05, + "loss": 0.074371337890625, + "step": 262 + }, + { + "epoch": 0.01777747735568474, + "grad_norm": 12.518738746643066, + "learning_rate": 1.53515625e-05, + "loss": 0.38153076171875, + "step": 263 + }, + { + "epoch": 0.0178450723266189, + "grad_norm": 7.270688533782959, + "learning_rate": 1.541015625e-05, + "loss": 0.232086181640625, + "step": 264 + }, + { + "epoch": 0.017912667297553062, + "grad_norm": 5.515626430511475, + "learning_rate": 1.546875e-05, + "loss": 0.21295166015625, + "step": 265 + }, + { + "epoch": 0.017980262268487226, + "grad_norm": 30.261770248413086, + "learning_rate": 1.552734375e-05, + "loss": 0.38824462890625, + "step": 266 + }, + { + "epoch": 0.018047857239421386, + "grad_norm": 9.709709167480469, + "learning_rate": 1.5585937500000002e-05, + "loss": 0.1822509765625, + "step": 267 + }, + { + "epoch": 0.01811545221035555, + "grad_norm": 2.342224359512329, + "learning_rate": 1.564453125e-05, + "loss": 0.1247711181640625, + "step": 268 + }, + { + "epoch": 0.018183047181289713, + "grad_norm": 7.883246898651123, + "learning_rate": 1.5703125e-05, + "loss": 0.2305908203125, + "step": 269 + }, + { + "epoch": 0.018250642152223873, + "grad_norm": 6.995165824890137, + "learning_rate": 1.576171875e-05, + "loss": 0.1698150634765625, + "step": 270 + }, + { + "epoch": 0.018318237123158037, + "grad_norm": 3.614910840988159, + "learning_rate": 1.58203125e-05, + "loss": 0.19525146484375, + "step": 271 + }, + { + "epoch": 0.0183858320940922, + "grad_norm": 9.818486213684082, + "learning_rate": 1.587890625e-05, + "loss": 0.17901611328125, + "step": 272 + }, + { + "epoch": 0.01845342706502636, + "grad_norm": 8.379522323608398, + "learning_rate": 1.59375e-05, + "loss": 0.201202392578125, + "step": 273 + }, + { + "epoch": 0.018521022035960524, + "grad_norm": 5.147401809692383, + "learning_rate": 1.599609375e-05, + "loss": 0.1692047119140625, + "step": 274 + }, + { + "epoch": 0.018588617006894688, + "grad_norm": 2.854189395904541, + "learning_rate": 1.60546875e-05, + "loss": 0.0820159912109375, + "step": 275 + }, + { + "epoch": 0.018656211977828848, + "grad_norm": 9.062012672424316, + "learning_rate": 1.611328125e-05, + "loss": 0.1484375, + "step": 276 + }, + { + "epoch": 0.01872380694876301, + "grad_norm": 8.157903671264648, + "learning_rate": 1.6171875000000002e-05, + "loss": 0.09213638305664062, + "step": 277 + }, + { + "epoch": 0.018791401919697175, + "grad_norm": 3.187164545059204, + "learning_rate": 1.623046875e-05, + "loss": 0.094757080078125, + "step": 278 + }, + { + "epoch": 0.018858996890631335, + "grad_norm": 5.700836181640625, + "learning_rate": 1.62890625e-05, + "loss": 0.254852294921875, + "step": 279 + }, + { + "epoch": 0.0189265918615655, + "grad_norm": 12.440113067626953, + "learning_rate": 1.634765625e-05, + "loss": 0.2650146484375, + "step": 280 + }, + { + "epoch": 0.018994186832499663, + "grad_norm": 7.842966556549072, + "learning_rate": 1.640625e-05, + "loss": 0.10101318359375, + "step": 281 + }, + { + "epoch": 0.019061781803433823, + "grad_norm": 6.115520477294922, + "learning_rate": 1.646484375e-05, + "loss": 0.09857177734375, + "step": 282 + }, + { + "epoch": 0.019129376774367986, + "grad_norm": 7.102740287780762, + "learning_rate": 1.65234375e-05, + "loss": 0.2293853759765625, + "step": 283 + }, + { + "epoch": 0.01919697174530215, + "grad_norm": 17.67252540588379, + "learning_rate": 1.6582031250000002e-05, + "loss": 0.22613525390625, + "step": 284 + }, + { + "epoch": 0.019264566716236314, + "grad_norm": 11.403902053833008, + "learning_rate": 1.6640625e-05, + "loss": 0.25860595703125, + "step": 285 + }, + { + "epoch": 0.019332161687170474, + "grad_norm": 8.753812789916992, + "learning_rate": 1.669921875e-05, + "loss": 0.238372802734375, + "step": 286 + }, + { + "epoch": 0.019399756658104637, + "grad_norm": 6.590813159942627, + "learning_rate": 1.67578125e-05, + "loss": 0.06768798828125, + "step": 287 + }, + { + "epoch": 0.0194673516290388, + "grad_norm": 8.019734382629395, + "learning_rate": 1.681640625e-05, + "loss": 0.2198944091796875, + "step": 288 + }, + { + "epoch": 0.01953494659997296, + "grad_norm": 8.043902397155762, + "learning_rate": 1.6875e-05, + "loss": 0.152496337890625, + "step": 289 + }, + { + "epoch": 0.019602541570907125, + "grad_norm": 3.1929779052734375, + "learning_rate": 1.693359375e-05, + "loss": 0.1244659423828125, + "step": 290 + }, + { + "epoch": 0.01967013654184129, + "grad_norm": 6.359134674072266, + "learning_rate": 1.69921875e-05, + "loss": 0.1319580078125, + "step": 291 + }, + { + "epoch": 0.01973773151277545, + "grad_norm": 5.0804829597473145, + "learning_rate": 1.705078125e-05, + "loss": 0.0881195068359375, + "step": 292 + }, + { + "epoch": 0.019805326483709612, + "grad_norm": 14.277009963989258, + "learning_rate": 1.7109375e-05, + "loss": 0.277313232421875, + "step": 293 + }, + { + "epoch": 0.019872921454643776, + "grad_norm": 3.023947238922119, + "learning_rate": 1.7167968750000002e-05, + "loss": 0.1365966796875, + "step": 294 + }, + { + "epoch": 0.019940516425577936, + "grad_norm": 7.106813430786133, + "learning_rate": 1.72265625e-05, + "loss": 0.185791015625, + "step": 295 + }, + { + "epoch": 0.0200081113965121, + "grad_norm": 2.1026418209075928, + "learning_rate": 1.728515625e-05, + "loss": 0.1824951171875, + "step": 296 + }, + { + "epoch": 0.020075706367446263, + "grad_norm": 14.690279006958008, + "learning_rate": 1.734375e-05, + "loss": 0.298095703125, + "step": 297 + }, + { + "epoch": 0.020143301338380423, + "grad_norm": 7.025702953338623, + "learning_rate": 1.740234375e-05, + "loss": 0.1397705078125, + "step": 298 + }, + { + "epoch": 0.020210896309314587, + "grad_norm": 3.2902510166168213, + "learning_rate": 1.74609375e-05, + "loss": 0.081146240234375, + "step": 299 + }, + { + "epoch": 0.02027849128024875, + "grad_norm": 22.511598587036133, + "learning_rate": 1.751953125e-05, + "loss": 0.32512664794921875, + "step": 300 + }, + { + "epoch": 0.02034608625118291, + "grad_norm": 2.8976125717163086, + "learning_rate": 1.7578125000000002e-05, + "loss": 0.08251953125, + "step": 301 + }, + { + "epoch": 0.020413681222117074, + "grad_norm": 21.201753616333008, + "learning_rate": 1.763671875e-05, + "loss": 0.3419952392578125, + "step": 302 + }, + { + "epoch": 0.020481276193051238, + "grad_norm": 1.3936522006988525, + "learning_rate": 1.76953125e-05, + "loss": 0.06640625, + "step": 303 + }, + { + "epoch": 0.020548871163985398, + "grad_norm": 10.699604988098145, + "learning_rate": 1.775390625e-05, + "loss": 0.1857452392578125, + "step": 304 + }, + { + "epoch": 0.02061646613491956, + "grad_norm": 5.8376569747924805, + "learning_rate": 1.78125e-05, + "loss": 0.28448486328125, + "step": 305 + }, + { + "epoch": 0.020684061105853725, + "grad_norm": 4.6396050453186035, + "learning_rate": 1.787109375e-05, + "loss": 0.231231689453125, + "step": 306 + }, + { + "epoch": 0.020751656076787885, + "grad_norm": 9.27744197845459, + "learning_rate": 1.79296875e-05, + "loss": 0.1875762939453125, + "step": 307 + }, + { + "epoch": 0.02081925104772205, + "grad_norm": 2.6783788204193115, + "learning_rate": 1.798828125e-05, + "loss": 0.13216400146484375, + "step": 308 + }, + { + "epoch": 0.020886846018656213, + "grad_norm": 9.891481399536133, + "learning_rate": 1.8046875e-05, + "loss": 0.248748779296875, + "step": 309 + }, + { + "epoch": 0.020954440989590376, + "grad_norm": 21.662004470825195, + "learning_rate": 1.810546875e-05, + "loss": 0.31037139892578125, + "step": 310 + }, + { + "epoch": 0.021022035960524536, + "grad_norm": 2.488569736480713, + "learning_rate": 1.8164062500000002e-05, + "loss": 0.08099365234375, + "step": 311 + }, + { + "epoch": 0.0210896309314587, + "grad_norm": 7.970717906951904, + "learning_rate": 1.822265625e-05, + "loss": 0.1691131591796875, + "step": 312 + }, + { + "epoch": 0.021157225902392864, + "grad_norm": 12.900465965270996, + "learning_rate": 1.828125e-05, + "loss": 0.2022857666015625, + "step": 313 + }, + { + "epoch": 0.021224820873327024, + "grad_norm": 11.320375442504883, + "learning_rate": 1.833984375e-05, + "loss": 0.360870361328125, + "step": 314 + }, + { + "epoch": 0.021292415844261187, + "grad_norm": 9.066874504089355, + "learning_rate": 1.83984375e-05, + "loss": 0.183441162109375, + "step": 315 + }, + { + "epoch": 0.02136001081519535, + "grad_norm": 5.733831882476807, + "learning_rate": 1.845703125e-05, + "loss": 0.198394775390625, + "step": 316 + }, + { + "epoch": 0.02142760578612951, + "grad_norm": 0.7661671042442322, + "learning_rate": 1.8515625e-05, + "loss": 0.051441192626953125, + "step": 317 + }, + { + "epoch": 0.021495200757063675, + "grad_norm": 5.705933570861816, + "learning_rate": 1.8574218750000002e-05, + "loss": 0.216217041015625, + "step": 318 + }, + { + "epoch": 0.02156279572799784, + "grad_norm": 8.623678207397461, + "learning_rate": 1.86328125e-05, + "loss": 0.2735748291015625, + "step": 319 + }, + { + "epoch": 0.021630390698932, + "grad_norm": 8.020511627197266, + "learning_rate": 1.869140625e-05, + "loss": 0.188629150390625, + "step": 320 + }, + { + "epoch": 0.021697985669866162, + "grad_norm": 4.364988803863525, + "learning_rate": 1.8750000000000002e-05, + "loss": 0.0779266357421875, + "step": 321 + }, + { + "epoch": 0.021765580640800326, + "grad_norm": 17.29228973388672, + "learning_rate": 1.880859375e-05, + "loss": 0.220550537109375, + "step": 322 + }, + { + "epoch": 0.021833175611734486, + "grad_norm": 11.285736083984375, + "learning_rate": 1.88671875e-05, + "loss": 0.1142120361328125, + "step": 323 + }, + { + "epoch": 0.02190077058266865, + "grad_norm": 14.848213195800781, + "learning_rate": 1.892578125e-05, + "loss": 0.25421142578125, + "step": 324 + }, + { + "epoch": 0.021968365553602813, + "grad_norm": 2.3136022090911865, + "learning_rate": 1.8984375e-05, + "loss": 0.1145172119140625, + "step": 325 + }, + { + "epoch": 0.022035960524536973, + "grad_norm": 11.078003883361816, + "learning_rate": 1.904296875e-05, + "loss": 0.252227783203125, + "step": 326 + }, + { + "epoch": 0.022103555495471137, + "grad_norm": 7.930416107177734, + "learning_rate": 1.91015625e-05, + "loss": 0.134552001953125, + "step": 327 + }, + { + "epoch": 0.0221711504664053, + "grad_norm": 5.706264972686768, + "learning_rate": 1.9160156250000002e-05, + "loss": 0.11920928955078125, + "step": 328 + }, + { + "epoch": 0.02223874543733946, + "grad_norm": 9.229329109191895, + "learning_rate": 1.921875e-05, + "loss": 0.2474365234375, + "step": 329 + }, + { + "epoch": 0.022306340408273624, + "grad_norm": 5.379703521728516, + "learning_rate": 1.927734375e-05, + "loss": 0.203155517578125, + "step": 330 + }, + { + "epoch": 0.022373935379207788, + "grad_norm": 8.083246231079102, + "learning_rate": 1.93359375e-05, + "loss": 0.29931640625, + "step": 331 + }, + { + "epoch": 0.022441530350141948, + "grad_norm": 6.9901347160339355, + "learning_rate": 1.939453125e-05, + "loss": 0.130767822265625, + "step": 332 + }, + { + "epoch": 0.02250912532107611, + "grad_norm": 2.3644144535064697, + "learning_rate": 1.9453125e-05, + "loss": 0.0998382568359375, + "step": 333 + }, + { + "epoch": 0.022576720292010275, + "grad_norm": 9.978816032409668, + "learning_rate": 1.951171875e-05, + "loss": 0.1741485595703125, + "step": 334 + }, + { + "epoch": 0.022644315262944435, + "grad_norm": 8.140107154846191, + "learning_rate": 1.95703125e-05, + "loss": 0.1309967041015625, + "step": 335 + }, + { + "epoch": 0.0227119102338786, + "grad_norm": 20.078338623046875, + "learning_rate": 1.962890625e-05, + "loss": 0.4046630859375, + "step": 336 + }, + { + "epoch": 0.022779505204812762, + "grad_norm": 2.16678524017334, + "learning_rate": 1.96875e-05, + "loss": 0.18419647216796875, + "step": 337 + }, + { + "epoch": 0.022847100175746926, + "grad_norm": 6.2441864013671875, + "learning_rate": 1.9746093750000002e-05, + "loss": 0.1351776123046875, + "step": 338 + }, + { + "epoch": 0.022914695146681086, + "grad_norm": 9.61379623413086, + "learning_rate": 1.98046875e-05, + "loss": 0.1864013671875, + "step": 339 + }, + { + "epoch": 0.02298229011761525, + "grad_norm": 1.7076141834259033, + "learning_rate": 1.986328125e-05, + "loss": 0.06641387939453125, + "step": 340 + }, + { + "epoch": 0.023049885088549413, + "grad_norm": 5.2171101570129395, + "learning_rate": 1.9921875e-05, + "loss": 0.0931549072265625, + "step": 341 + }, + { + "epoch": 0.023117480059483574, + "grad_norm": 8.610881805419922, + "learning_rate": 1.998046875e-05, + "loss": 0.0764312744140625, + "step": 342 + }, + { + "epoch": 0.023185075030417737, + "grad_norm": 10.095645904541016, + "learning_rate": 2.00390625e-05, + "loss": 0.123260498046875, + "step": 343 + }, + { + "epoch": 0.0232526700013519, + "grad_norm": 11.017220497131348, + "learning_rate": 2.009765625e-05, + "loss": 0.14111328125, + "step": 344 + }, + { + "epoch": 0.02332026497228606, + "grad_norm": 8.017125129699707, + "learning_rate": 2.0156250000000002e-05, + "loss": 0.26336669921875, + "step": 345 + }, + { + "epoch": 0.023387859943220225, + "grad_norm": 4.845516204833984, + "learning_rate": 2.021484375e-05, + "loss": 0.1103363037109375, + "step": 346 + }, + { + "epoch": 0.023455454914154388, + "grad_norm": 1.648187279701233, + "learning_rate": 2.02734375e-05, + "loss": 0.079132080078125, + "step": 347 + }, + { + "epoch": 0.02352304988508855, + "grad_norm": 0.8758795857429504, + "learning_rate": 2.033203125e-05, + "loss": 0.047237396240234375, + "step": 348 + }, + { + "epoch": 0.023590644856022712, + "grad_norm": 8.367427825927734, + "learning_rate": 2.0390625e-05, + "loss": 0.18487548828125, + "step": 349 + }, + { + "epoch": 0.023658239826956876, + "grad_norm": 11.413704872131348, + "learning_rate": 2.044921875e-05, + "loss": 0.299468994140625, + "step": 350 + }, + { + "epoch": 0.023725834797891036, + "grad_norm": 1.1486093997955322, + "learning_rate": 2.05078125e-05, + "loss": 0.1114501953125, + "step": 351 + }, + { + "epoch": 0.0237934297688252, + "grad_norm": 8.391826629638672, + "learning_rate": 2.056640625e-05, + "loss": 0.24261474609375, + "step": 352 + }, + { + "epoch": 0.023861024739759363, + "grad_norm": 4.6582136154174805, + "learning_rate": 2.0625e-05, + "loss": 0.183563232421875, + "step": 353 + }, + { + "epoch": 0.023928619710693523, + "grad_norm": 3.008481025695801, + "learning_rate": 2.068359375e-05, + "loss": 0.2455596923828125, + "step": 354 + }, + { + "epoch": 0.023996214681627687, + "grad_norm": 3.93223237991333, + "learning_rate": 2.0742187500000002e-05, + "loss": 0.1039886474609375, + "step": 355 + }, + { + "epoch": 0.02406380965256185, + "grad_norm": 8.160524368286133, + "learning_rate": 2.080078125e-05, + "loss": 0.2410736083984375, + "step": 356 + }, + { + "epoch": 0.02413140462349601, + "grad_norm": 5.512832164764404, + "learning_rate": 2.0859375e-05, + "loss": 0.196319580078125, + "step": 357 + }, + { + "epoch": 0.024198999594430174, + "grad_norm": 0.9439102411270142, + "learning_rate": 2.091796875e-05, + "loss": 0.080291748046875, + "step": 358 + }, + { + "epoch": 0.024266594565364338, + "grad_norm": 6.158370494842529, + "learning_rate": 2.09765625e-05, + "loss": 0.189453125, + "step": 359 + }, + { + "epoch": 0.024334189536298498, + "grad_norm": 10.541096687316895, + "learning_rate": 2.103515625e-05, + "loss": 0.349517822265625, + "step": 360 + }, + { + "epoch": 0.02440178450723266, + "grad_norm": 1.570146918296814, + "learning_rate": 2.109375e-05, + "loss": 0.0487823486328125, + "step": 361 + }, + { + "epoch": 0.024469379478166825, + "grad_norm": 0.6929265260696411, + "learning_rate": 2.1152343750000002e-05, + "loss": 0.0666656494140625, + "step": 362 + }, + { + "epoch": 0.024536974449100985, + "grad_norm": 34.095401763916016, + "learning_rate": 2.12109375e-05, + "loss": 0.4329833984375, + "step": 363 + }, + { + "epoch": 0.02460456942003515, + "grad_norm": 4.378757476806641, + "learning_rate": 2.126953125e-05, + "loss": 0.1498088836669922, + "step": 364 + }, + { + "epoch": 0.024672164390969312, + "grad_norm": 9.516453742980957, + "learning_rate": 2.1328125000000002e-05, + "loss": 0.259521484375, + "step": 365 + }, + { + "epoch": 0.024739759361903476, + "grad_norm": 8.274052619934082, + "learning_rate": 2.138671875e-05, + "loss": 0.15692138671875, + "step": 366 + }, + { + "epoch": 0.024807354332837636, + "grad_norm": 3.106950521469116, + "learning_rate": 2.14453125e-05, + "loss": 0.072784423828125, + "step": 367 + }, + { + "epoch": 0.0248749493037718, + "grad_norm": 7.230459213256836, + "learning_rate": 2.150390625e-05, + "loss": 0.23923492431640625, + "step": 368 + }, + { + "epoch": 0.024942544274705963, + "grad_norm": 6.129903316497803, + "learning_rate": 2.15625e-05, + "loss": 0.243896484375, + "step": 369 + }, + { + "epoch": 0.025010139245640124, + "grad_norm": 4.025104999542236, + "learning_rate": 2.162109375e-05, + "loss": 0.287841796875, + "step": 370 + }, + { + "epoch": 0.025077734216574287, + "grad_norm": 6.756148338317871, + "learning_rate": 2.16796875e-05, + "loss": 0.231231689453125, + "step": 371 + }, + { + "epoch": 0.02514532918750845, + "grad_norm": 2.333451747894287, + "learning_rate": 2.1738281250000002e-05, + "loss": 0.148590087890625, + "step": 372 + }, + { + "epoch": 0.02521292415844261, + "grad_norm": 20.399250030517578, + "learning_rate": 2.1796875e-05, + "loss": 0.343505859375, + "step": 373 + }, + { + "epoch": 0.025280519129376774, + "grad_norm": 14.078908920288086, + "learning_rate": 2.185546875e-05, + "loss": 0.2408599853515625, + "step": 374 + }, + { + "epoch": 0.025348114100310938, + "grad_norm": 6.403088569641113, + "learning_rate": 2.19140625e-05, + "loss": 0.2024383544921875, + "step": 375 + }, + { + "epoch": 0.025415709071245098, + "grad_norm": 3.4311001300811768, + "learning_rate": 2.197265625e-05, + "loss": 0.0633087158203125, + "step": 376 + }, + { + "epoch": 0.025483304042179262, + "grad_norm": 15.011462211608887, + "learning_rate": 2.203125e-05, + "loss": 0.331207275390625, + "step": 377 + }, + { + "epoch": 0.025550899013113425, + "grad_norm": 12.760905265808105, + "learning_rate": 2.208984375e-05, + "loss": 0.1469268798828125, + "step": 378 + }, + { + "epoch": 0.025618493984047586, + "grad_norm": 5.718034267425537, + "learning_rate": 2.2148437500000002e-05, + "loss": 0.1087493896484375, + "step": 379 + }, + { + "epoch": 0.02568608895498175, + "grad_norm": 3.693514585494995, + "learning_rate": 2.220703125e-05, + "loss": 0.2860107421875, + "step": 380 + }, + { + "epoch": 0.025753683925915913, + "grad_norm": 3.5825624465942383, + "learning_rate": 2.2265625e-05, + "loss": 0.29046630859375, + "step": 381 + }, + { + "epoch": 0.025821278896850073, + "grad_norm": 3.7215635776519775, + "learning_rate": 2.2324218750000002e-05, + "loss": 0.1082000732421875, + "step": 382 + }, + { + "epoch": 0.025888873867784237, + "grad_norm": 2.0840439796447754, + "learning_rate": 2.23828125e-05, + "loss": 0.0531463623046875, + "step": 383 + }, + { + "epoch": 0.0259564688387184, + "grad_norm": 17.026700973510742, + "learning_rate": 2.244140625e-05, + "loss": 0.24310302734375, + "step": 384 + }, + { + "epoch": 0.02602406380965256, + "grad_norm": 27.07091522216797, + "learning_rate": 2.25e-05, + "loss": 0.32452392578125, + "step": 385 + }, + { + "epoch": 0.026091658780586724, + "grad_norm": 10.071859359741211, + "learning_rate": 2.255859375e-05, + "loss": 0.250152587890625, + "step": 386 + }, + { + "epoch": 0.026159253751520888, + "grad_norm": 8.271512031555176, + "learning_rate": 2.26171875e-05, + "loss": 0.17608642578125, + "step": 387 + }, + { + "epoch": 0.026226848722455048, + "grad_norm": 1.0737972259521484, + "learning_rate": 2.267578125e-05, + "loss": 0.047607421875, + "step": 388 + }, + { + "epoch": 0.02629444369338921, + "grad_norm": 0.6329683661460876, + "learning_rate": 2.2734375000000002e-05, + "loss": 0.03125, + "step": 389 + }, + { + "epoch": 0.026362038664323375, + "grad_norm": 15.753418922424316, + "learning_rate": 2.279296875e-05, + "loss": 0.347900390625, + "step": 390 + }, + { + "epoch": 0.02642963363525754, + "grad_norm": 9.622559547424316, + "learning_rate": 2.28515625e-05, + "loss": 0.24176025390625, + "step": 391 + }, + { + "epoch": 0.0264972286061917, + "grad_norm": 2.510590076446533, + "learning_rate": 2.291015625e-05, + "loss": 0.05461883544921875, + "step": 392 + }, + { + "epoch": 0.026564823577125862, + "grad_norm": 4.917771339416504, + "learning_rate": 2.296875e-05, + "loss": 0.13381195068359375, + "step": 393 + }, + { + "epoch": 0.026632418548060026, + "grad_norm": 5.751366138458252, + "learning_rate": 2.302734375e-05, + "loss": 0.362060546875, + "step": 394 + }, + { + "epoch": 0.026700013518994186, + "grad_norm": 5.922732830047607, + "learning_rate": 2.30859375e-05, + "loss": 0.1941680908203125, + "step": 395 + }, + { + "epoch": 0.02676760848992835, + "grad_norm": 6.225771903991699, + "learning_rate": 2.3144531250000002e-05, + "loss": 0.138458251953125, + "step": 396 + }, + { + "epoch": 0.026835203460862513, + "grad_norm": 4.1280903816223145, + "learning_rate": 2.3203125e-05, + "loss": 0.07604217529296875, + "step": 397 + }, + { + "epoch": 0.026902798431796673, + "grad_norm": 5.0215253829956055, + "learning_rate": 2.326171875e-05, + "loss": 0.3011474609375, + "step": 398 + }, + { + "epoch": 0.026970393402730837, + "grad_norm": 1.119856595993042, + "learning_rate": 2.3320312500000002e-05, + "loss": 0.10260009765625, + "step": 399 + }, + { + "epoch": 0.027037988373665, + "grad_norm": 2.252176284790039, + "learning_rate": 2.337890625e-05, + "loss": 0.15374755859375, + "step": 400 + }, + { + "epoch": 0.02710558334459916, + "grad_norm": 2.1092448234558105, + "learning_rate": 2.34375e-05, + "loss": 0.11439895629882812, + "step": 401 + }, + { + "epoch": 0.027173178315533324, + "grad_norm": 3.9765944480895996, + "learning_rate": 2.349609375e-05, + "loss": 0.102264404296875, + "step": 402 + }, + { + "epoch": 0.027240773286467488, + "grad_norm": 3.2441365718841553, + "learning_rate": 2.35546875e-05, + "loss": 0.2510223388671875, + "step": 403 + }, + { + "epoch": 0.027308368257401648, + "grad_norm": 7.121006965637207, + "learning_rate": 2.361328125e-05, + "loss": 0.190399169921875, + "step": 404 + }, + { + "epoch": 0.027375963228335812, + "grad_norm": 7.771458625793457, + "learning_rate": 2.3671875e-05, + "loss": 0.3065185546875, + "step": 405 + }, + { + "epoch": 0.027443558199269975, + "grad_norm": 2.170820713043213, + "learning_rate": 2.3730468750000002e-05, + "loss": 0.2342529296875, + "step": 406 + }, + { + "epoch": 0.027511153170204136, + "grad_norm": 0.5624839067459106, + "learning_rate": 2.37890625e-05, + "loss": 0.03150177001953125, + "step": 407 + }, + { + "epoch": 0.0275787481411383, + "grad_norm": 6.942445755004883, + "learning_rate": 2.384765625e-05, + "loss": 0.156890869140625, + "step": 408 + }, + { + "epoch": 0.027646343112072463, + "grad_norm": 5.809784412384033, + "learning_rate": 2.3906250000000002e-05, + "loss": 0.206939697265625, + "step": 409 + }, + { + "epoch": 0.027713938083006623, + "grad_norm": 0.7757819294929504, + "learning_rate": 2.396484375e-05, + "loss": 0.04688262939453125, + "step": 410 + }, + { + "epoch": 0.027781533053940786, + "grad_norm": 1.4328538179397583, + "learning_rate": 2.40234375e-05, + "loss": 0.13787841796875, + "step": 411 + }, + { + "epoch": 0.02784912802487495, + "grad_norm": 2.5853662490844727, + "learning_rate": 2.408203125e-05, + "loss": 0.19533157348632812, + "step": 412 + }, + { + "epoch": 0.02791672299580911, + "grad_norm": 1.5134639739990234, + "learning_rate": 2.4140625e-05, + "loss": 0.0529022216796875, + "step": 413 + }, + { + "epoch": 0.027984317966743274, + "grad_norm": 2.6085569858551025, + "learning_rate": 2.419921875e-05, + "loss": 0.08013153076171875, + "step": 414 + }, + { + "epoch": 0.028051912937677437, + "grad_norm": 2.6905324459075928, + "learning_rate": 2.42578125e-05, + "loss": 0.1066436767578125, + "step": 415 + }, + { + "epoch": 0.028119507908611598, + "grad_norm": 1.9389193058013916, + "learning_rate": 2.4316406250000002e-05, + "loss": 0.16960906982421875, + "step": 416 + }, + { + "epoch": 0.02818710287954576, + "grad_norm": 1.633745551109314, + "learning_rate": 2.4375e-05, + "loss": 0.10498046875, + "step": 417 + }, + { + "epoch": 0.028254697850479925, + "grad_norm": 4.694248676300049, + "learning_rate": 2.443359375e-05, + "loss": 0.29815673828125, + "step": 418 + }, + { + "epoch": 0.02832229282141409, + "grad_norm": 3.166347026824951, + "learning_rate": 2.44921875e-05, + "loss": 0.1195831298828125, + "step": 419 + }, + { + "epoch": 0.02838988779234825, + "grad_norm": 6.209982872009277, + "learning_rate": 2.455078125e-05, + "loss": 0.199005126953125, + "step": 420 + }, + { + "epoch": 0.028457482763282412, + "grad_norm": 1.4571806192398071, + "learning_rate": 2.4609375e-05, + "loss": 0.20528411865234375, + "step": 421 + }, + { + "epoch": 0.028525077734216576, + "grad_norm": 5.560473442077637, + "learning_rate": 2.466796875e-05, + "loss": 0.2284088134765625, + "step": 422 + }, + { + "epoch": 0.028592672705150736, + "grad_norm": 6.700767517089844, + "learning_rate": 2.4726562500000002e-05, + "loss": 0.294677734375, + "step": 423 + }, + { + "epoch": 0.0286602676760849, + "grad_norm": 2.20605206489563, + "learning_rate": 2.478515625e-05, + "loss": 0.087646484375, + "step": 424 + }, + { + "epoch": 0.028727862647019063, + "grad_norm": 1.2782211303710938, + "learning_rate": 2.484375e-05, + "loss": 0.06169891357421875, + "step": 425 + }, + { + "epoch": 0.028795457617953223, + "grad_norm": 2.9984960556030273, + "learning_rate": 2.4902343750000002e-05, + "loss": 0.18921661376953125, + "step": 426 + }, + { + "epoch": 0.028863052588887387, + "grad_norm": 10.975702285766602, + "learning_rate": 2.49609375e-05, + "loss": 0.3327178955078125, + "step": 427 + }, + { + "epoch": 0.02893064755982155, + "grad_norm": 2.317087173461914, + "learning_rate": 2.501953125e-05, + "loss": 0.03583526611328125, + "step": 428 + }, + { + "epoch": 0.02899824253075571, + "grad_norm": 1.8357352018356323, + "learning_rate": 2.5078125e-05, + "loss": 0.07122039794921875, + "step": 429 + }, + { + "epoch": 0.029065837501689874, + "grad_norm": 27.51279067993164, + "learning_rate": 2.513671875e-05, + "loss": 0.378662109375, + "step": 430 + }, + { + "epoch": 0.029133432472624038, + "grad_norm": 24.70044708251953, + "learning_rate": 2.51953125e-05, + "loss": 0.320648193359375, + "step": 431 + }, + { + "epoch": 0.029201027443558198, + "grad_norm": 7.272634029388428, + "learning_rate": 2.525390625e-05, + "loss": 0.0934906005859375, + "step": 432 + }, + { + "epoch": 0.02926862241449236, + "grad_norm": 15.722953796386719, + "learning_rate": 2.5312500000000002e-05, + "loss": 0.22308349609375, + "step": 433 + }, + { + "epoch": 0.029336217385426525, + "grad_norm": 12.332650184631348, + "learning_rate": 2.537109375e-05, + "loss": 0.3515625, + "step": 434 + }, + { + "epoch": 0.029403812356360685, + "grad_norm": 3.322026491165161, + "learning_rate": 2.54296875e-05, + "loss": 0.1787872314453125, + "step": 435 + }, + { + "epoch": 0.02947140732729485, + "grad_norm": 27.003564834594727, + "learning_rate": 2.548828125e-05, + "loss": 0.50177001953125, + "step": 436 + }, + { + "epoch": 0.029539002298229013, + "grad_norm": 10.251565933227539, + "learning_rate": 2.5546875e-05, + "loss": 0.183624267578125, + "step": 437 + }, + { + "epoch": 0.029606597269163173, + "grad_norm": 15.310209274291992, + "learning_rate": 2.560546875e-05, + "loss": 0.35498046875, + "step": 438 + }, + { + "epoch": 0.029674192240097336, + "grad_norm": 16.123598098754883, + "learning_rate": 2.56640625e-05, + "loss": 0.352691650390625, + "step": 439 + }, + { + "epoch": 0.0297417872110315, + "grad_norm": 6.12283992767334, + "learning_rate": 2.5722656250000002e-05, + "loss": 0.172088623046875, + "step": 440 + }, + { + "epoch": 0.02980938218196566, + "grad_norm": 4.673194408416748, + "learning_rate": 2.578125e-05, + "loss": 0.228790283203125, + "step": 441 + }, + { + "epoch": 0.029876977152899824, + "grad_norm": 4.531480312347412, + "learning_rate": 2.583984375e-05, + "loss": 0.098968505859375, + "step": 442 + }, + { + "epoch": 0.029944572123833987, + "grad_norm": 5.083461284637451, + "learning_rate": 2.5898437500000002e-05, + "loss": 0.187744140625, + "step": 443 + }, + { + "epoch": 0.03001216709476815, + "grad_norm": 2.66290545463562, + "learning_rate": 2.595703125e-05, + "loss": 0.16241455078125, + "step": 444 + }, + { + "epoch": 0.03007976206570231, + "grad_norm": 1.255873441696167, + "learning_rate": 2.6015625e-05, + "loss": 0.0851898193359375, + "step": 445 + }, + { + "epoch": 0.030147357036636475, + "grad_norm": 4.275351047515869, + "learning_rate": 2.607421875e-05, + "loss": 0.2740478515625, + "step": 446 + }, + { + "epoch": 0.03021495200757064, + "grad_norm": 1.6822147369384766, + "learning_rate": 2.61328125e-05, + "loss": 0.156768798828125, + "step": 447 + }, + { + "epoch": 0.0302825469785048, + "grad_norm": 3.388277053833008, + "learning_rate": 2.619140625e-05, + "loss": 0.302978515625, + "step": 448 + }, + { + "epoch": 0.030350141949438962, + "grad_norm": 1.4071094989776611, + "learning_rate": 2.625e-05, + "loss": 0.09546661376953125, + "step": 449 + }, + { + "epoch": 0.030417736920373126, + "grad_norm": 1.9848068952560425, + "learning_rate": 2.6308593750000002e-05, + "loss": 0.2664794921875, + "step": 450 + }, + { + "epoch": 0.030485331891307286, + "grad_norm": 3.504163980484009, + "learning_rate": 2.63671875e-05, + "loss": 0.2108154296875, + "step": 451 + }, + { + "epoch": 0.03055292686224145, + "grad_norm": 0.8638094663619995, + "learning_rate": 2.642578125e-05, + "loss": 0.09218597412109375, + "step": 452 + }, + { + "epoch": 0.030620521833175613, + "grad_norm": 2.193190336227417, + "learning_rate": 2.6484375000000002e-05, + "loss": 0.18084716796875, + "step": 453 + }, + { + "epoch": 0.030688116804109773, + "grad_norm": 4.088580131530762, + "learning_rate": 2.654296875e-05, + "loss": 0.250579833984375, + "step": 454 + }, + { + "epoch": 0.030755711775043937, + "grad_norm": 2.3764312267303467, + "learning_rate": 2.66015625e-05, + "loss": 0.223236083984375, + "step": 455 + }, + { + "epoch": 0.0308233067459781, + "grad_norm": 1.3343976736068726, + "learning_rate": 2.666015625e-05, + "loss": 0.06396484375, + "step": 456 + }, + { + "epoch": 0.03089090171691226, + "grad_norm": 1.8936148881912231, + "learning_rate": 2.6718750000000002e-05, + "loss": 0.11426544189453125, + "step": 457 + }, + { + "epoch": 0.030958496687846424, + "grad_norm": 1.311998963356018, + "learning_rate": 2.677734375e-05, + "loss": 0.0506134033203125, + "step": 458 + }, + { + "epoch": 0.031026091658780588, + "grad_norm": 0.9546435475349426, + "learning_rate": 2.68359375e-05, + "loss": 0.06926727294921875, + "step": 459 + }, + { + "epoch": 0.031093686629714748, + "grad_norm": 3.561858892440796, + "learning_rate": 2.6894531250000002e-05, + "loss": 0.21929931640625, + "step": 460 + }, + { + "epoch": 0.03116128160064891, + "grad_norm": 4.304913520812988, + "learning_rate": 2.6953125e-05, + "loss": 0.23797607421875, + "step": 461 + }, + { + "epoch": 0.031228876571583075, + "grad_norm": 2.0724387168884277, + "learning_rate": 2.701171875e-05, + "loss": 0.12384033203125, + "step": 462 + }, + { + "epoch": 0.03129647154251724, + "grad_norm": 7.133396148681641, + "learning_rate": 2.70703125e-05, + "loss": 0.318115234375, + "step": 463 + }, + { + "epoch": 0.0313640665134514, + "grad_norm": 5.988895893096924, + "learning_rate": 2.712890625e-05, + "loss": 0.3399658203125, + "step": 464 + }, + { + "epoch": 0.03143166148438556, + "grad_norm": 5.082204818725586, + "learning_rate": 2.71875e-05, + "loss": 0.40509033203125, + "step": 465 + }, + { + "epoch": 0.03149925645531972, + "grad_norm": 1.7945188283920288, + "learning_rate": 2.724609375e-05, + "loss": 0.11090087890625, + "step": 466 + }, + { + "epoch": 0.031566851426253886, + "grad_norm": 7.486267566680908, + "learning_rate": 2.7304687500000002e-05, + "loss": 0.21630859375, + "step": 467 + }, + { + "epoch": 0.03163444639718805, + "grad_norm": 6.199929714202881, + "learning_rate": 2.736328125e-05, + "loss": 0.170196533203125, + "step": 468 + }, + { + "epoch": 0.031702041368122214, + "grad_norm": 2.7673981189727783, + "learning_rate": 2.7421875e-05, + "loss": 0.0944976806640625, + "step": 469 + }, + { + "epoch": 0.03176963633905638, + "grad_norm": 4.669311046600342, + "learning_rate": 2.7480468750000002e-05, + "loss": 0.1676177978515625, + "step": 470 + }, + { + "epoch": 0.031837231309990534, + "grad_norm": 5.885353088378906, + "learning_rate": 2.75390625e-05, + "loss": 0.211578369140625, + "step": 471 + }, + { + "epoch": 0.0319048262809247, + "grad_norm": 2.3928325176239014, + "learning_rate": 2.759765625e-05, + "loss": 0.1131744384765625, + "step": 472 + }, + { + "epoch": 0.03197242125185886, + "grad_norm": 3.843644142150879, + "learning_rate": 2.765625e-05, + "loss": 0.172210693359375, + "step": 473 + }, + { + "epoch": 0.032040016222793025, + "grad_norm": 1.9580055475234985, + "learning_rate": 2.7714843750000002e-05, + "loss": 0.1076507568359375, + "step": 474 + }, + { + "epoch": 0.03210761119372719, + "grad_norm": 4.578806400299072, + "learning_rate": 2.77734375e-05, + "loss": 0.1914825439453125, + "step": 475 + }, + { + "epoch": 0.03217520616466135, + "grad_norm": 1.1899564266204834, + "learning_rate": 2.783203125e-05, + "loss": 0.20062255859375, + "step": 476 + }, + { + "epoch": 0.03224280113559551, + "grad_norm": 4.048281669616699, + "learning_rate": 2.7890625000000002e-05, + "loss": 0.225555419921875, + "step": 477 + }, + { + "epoch": 0.03231039610652967, + "grad_norm": 5.124964237213135, + "learning_rate": 2.794921875e-05, + "loss": 0.20465087890625, + "step": 478 + }, + { + "epoch": 0.032377991077463836, + "grad_norm": 4.241960525512695, + "learning_rate": 2.80078125e-05, + "loss": 0.166839599609375, + "step": 479 + }, + { + "epoch": 0.032445586048398, + "grad_norm": 2.5796732902526855, + "learning_rate": 2.806640625e-05, + "loss": 0.140106201171875, + "step": 480 + }, + { + "epoch": 0.03251318101933216, + "grad_norm": 2.3280487060546875, + "learning_rate": 2.8125e-05, + "loss": 0.2071380615234375, + "step": 481 + }, + { + "epoch": 0.03258077599026633, + "grad_norm": 1.4279780387878418, + "learning_rate": 2.818359375e-05, + "loss": 0.051242828369140625, + "step": 482 + }, + { + "epoch": 0.03264837096120048, + "grad_norm": 6.426149368286133, + "learning_rate": 2.82421875e-05, + "loss": 0.26953125, + "step": 483 + }, + { + "epoch": 0.03271596593213465, + "grad_norm": 4.163753032684326, + "learning_rate": 2.8300781250000002e-05, + "loss": 0.218841552734375, + "step": 484 + }, + { + "epoch": 0.03278356090306881, + "grad_norm": 2.0722248554229736, + "learning_rate": 2.8359375e-05, + "loss": 0.244354248046875, + "step": 485 + }, + { + "epoch": 0.032851155874002974, + "grad_norm": 0.7884582877159119, + "learning_rate": 2.841796875e-05, + "loss": 0.07666015625, + "step": 486 + }, + { + "epoch": 0.03291875084493714, + "grad_norm": 2.9058279991149902, + "learning_rate": 2.8476562500000002e-05, + "loss": 0.1695098876953125, + "step": 487 + }, + { + "epoch": 0.0329863458158713, + "grad_norm": 2.1125428676605225, + "learning_rate": 2.853515625e-05, + "loss": 0.30303955078125, + "step": 488 + }, + { + "epoch": 0.033053940786805465, + "grad_norm": 5.308195114135742, + "learning_rate": 2.859375e-05, + "loss": 0.16912841796875, + "step": 489 + }, + { + "epoch": 0.03312153575773962, + "grad_norm": 1.978914737701416, + "learning_rate": 2.865234375e-05, + "loss": 0.185638427734375, + "step": 490 + }, + { + "epoch": 0.033189130728673785, + "grad_norm": 5.4174113273620605, + "learning_rate": 2.87109375e-05, + "loss": 0.20507049560546875, + "step": 491 + }, + { + "epoch": 0.03325672569960795, + "grad_norm": 1.4433060884475708, + "learning_rate": 2.876953125e-05, + "loss": 0.10002899169921875, + "step": 492 + }, + { + "epoch": 0.03332432067054211, + "grad_norm": 3.55291485786438, + "learning_rate": 2.8828125e-05, + "loss": 0.1599884033203125, + "step": 493 + }, + { + "epoch": 0.033391915641476276, + "grad_norm": 2.94087290763855, + "learning_rate": 2.8886718750000002e-05, + "loss": 0.09096527099609375, + "step": 494 + }, + { + "epoch": 0.03345951061241044, + "grad_norm": 2.99192476272583, + "learning_rate": 2.89453125e-05, + "loss": 0.18865966796875, + "step": 495 + }, + { + "epoch": 0.033527105583344596, + "grad_norm": 0.8117580413818359, + "learning_rate": 2.900390625e-05, + "loss": 0.1006927490234375, + "step": 496 + }, + { + "epoch": 0.03359470055427876, + "grad_norm": 8.738884925842285, + "learning_rate": 2.90625e-05, + "loss": 0.26808929443359375, + "step": 497 + }, + { + "epoch": 0.033662295525212924, + "grad_norm": 2.511035919189453, + "learning_rate": 2.912109375e-05, + "loss": 0.14691162109375, + "step": 498 + }, + { + "epoch": 0.03372989049614709, + "grad_norm": 3.4968461990356445, + "learning_rate": 2.91796875e-05, + "loss": 0.2660980224609375, + "step": 499 + }, + { + "epoch": 0.03379748546708125, + "grad_norm": 1.2486506700515747, + "learning_rate": 2.923828125e-05, + "loss": 0.0526275634765625, + "step": 500 + }, + { + "epoch": 0.033865080438015414, + "grad_norm": 1.025908350944519, + "learning_rate": 2.9296875000000002e-05, + "loss": 0.0847930908203125, + "step": 501 + }, + { + "epoch": 0.03393267540894957, + "grad_norm": 4.5361738204956055, + "learning_rate": 2.935546875e-05, + "loss": 0.25897216796875, + "step": 502 + }, + { + "epoch": 0.034000270379883735, + "grad_norm": 5.306682109832764, + "learning_rate": 2.94140625e-05, + "loss": 0.09691047668457031, + "step": 503 + }, + { + "epoch": 0.0340678653508179, + "grad_norm": 3.5618929862976074, + "learning_rate": 2.9472656250000002e-05, + "loss": 0.23748779296875, + "step": 504 + }, + { + "epoch": 0.03413546032175206, + "grad_norm": 5.833836078643799, + "learning_rate": 2.953125e-05, + "loss": 0.1499176025390625, + "step": 505 + }, + { + "epoch": 0.034203055292686226, + "grad_norm": 1.7583891153335571, + "learning_rate": 2.958984375e-05, + "loss": 0.18524169921875, + "step": 506 + }, + { + "epoch": 0.03427065026362039, + "grad_norm": 1.5318264961242676, + "learning_rate": 2.96484375e-05, + "loss": 0.0628204345703125, + "step": 507 + }, + { + "epoch": 0.034338245234554546, + "grad_norm": 3.405144691467285, + "learning_rate": 2.970703125e-05, + "loss": 0.16989898681640625, + "step": 508 + }, + { + "epoch": 0.03440584020548871, + "grad_norm": 4.384740829467773, + "learning_rate": 2.9765625e-05, + "loss": 0.1421661376953125, + "step": 509 + }, + { + "epoch": 0.03447343517642287, + "grad_norm": 0.5372325778007507, + "learning_rate": 2.982421875e-05, + "loss": 0.063751220703125, + "step": 510 + }, + { + "epoch": 0.03454103014735704, + "grad_norm": 8.611273765563965, + "learning_rate": 2.9882812500000002e-05, + "loss": 0.2488555908203125, + "step": 511 + }, + { + "epoch": 0.0346086251182912, + "grad_norm": 1.6546189785003662, + "learning_rate": 2.994140625e-05, + "loss": 0.0645751953125, + "step": 512 + }, + { + "epoch": 0.034676220089225364, + "grad_norm": 1.179900050163269, + "learning_rate": 3e-05, + "loss": 0.097137451171875, + "step": 513 + }, + { + "epoch": 0.03474381506015953, + "grad_norm": 1.5044550895690918, + "learning_rate": 2.9999999637103358e-05, + "loss": 0.1161651611328125, + "step": 514 + }, + { + "epoch": 0.034811410031093684, + "grad_norm": 4.979653358459473, + "learning_rate": 2.999999854841345e-05, + "loss": 0.19903564453125, + "step": 515 + }, + { + "epoch": 0.03487900500202785, + "grad_norm": 8.634427070617676, + "learning_rate": 2.9999996733930325e-05, + "loss": 0.26019287109375, + "step": 516 + }, + { + "epoch": 0.03494659997296201, + "grad_norm": 2.5824148654937744, + "learning_rate": 2.999999419365408e-05, + "loss": 0.0971832275390625, + "step": 517 + }, + { + "epoch": 0.035014194943896175, + "grad_norm": 2.5025506019592285, + "learning_rate": 2.999999092758483e-05, + "loss": 0.2554931640625, + "step": 518 + }, + { + "epoch": 0.03508178991483034, + "grad_norm": 3.689680814743042, + "learning_rate": 2.9999986935722734e-05, + "loss": 0.3067626953125, + "step": 519 + }, + { + "epoch": 0.0351493848857645, + "grad_norm": 0.8090178370475769, + "learning_rate": 2.9999982218067983e-05, + "loss": 0.0940093994140625, + "step": 520 + }, + { + "epoch": 0.03521697985669866, + "grad_norm": 1.9078088998794556, + "learning_rate": 2.9999976774620814e-05, + "loss": 0.15077972412109375, + "step": 521 + }, + { + "epoch": 0.03528457482763282, + "grad_norm": 2.2195756435394287, + "learning_rate": 2.999997060538148e-05, + "loss": 0.212646484375, + "step": 522 + }, + { + "epoch": 0.035352169798566986, + "grad_norm": 1.333257794380188, + "learning_rate": 2.999996371035029e-05, + "loss": 0.179443359375, + "step": 523 + }, + { + "epoch": 0.03541976476950115, + "grad_norm": 1.7220290899276733, + "learning_rate": 2.9999956089527564e-05, + "loss": 0.1468353271484375, + "step": 524 + }, + { + "epoch": 0.03548735974043531, + "grad_norm": 1.610285997390747, + "learning_rate": 2.999994774291368e-05, + "loss": 0.0954742431640625, + "step": 525 + }, + { + "epoch": 0.03555495471136948, + "grad_norm": 0.744406521320343, + "learning_rate": 2.999993867050905e-05, + "loss": 0.0748443603515625, + "step": 526 + }, + { + "epoch": 0.035622549682303634, + "grad_norm": 0.4414798617362976, + "learning_rate": 2.9999928872314093e-05, + "loss": 0.04856109619140625, + "step": 527 + }, + { + "epoch": 0.0356901446532378, + "grad_norm": 1.5281988382339478, + "learning_rate": 2.99999183483293e-05, + "loss": 0.1868133544921875, + "step": 528 + }, + { + "epoch": 0.03575773962417196, + "grad_norm": 0.6250056624412537, + "learning_rate": 2.999990709855517e-05, + "loss": 0.09075927734375, + "step": 529 + }, + { + "epoch": 0.035825334595106124, + "grad_norm": 0.9773966670036316, + "learning_rate": 2.9999895122992257e-05, + "loss": 0.1099090576171875, + "step": 530 + }, + { + "epoch": 0.03589292956604029, + "grad_norm": 2.9628450870513916, + "learning_rate": 2.9999882421641124e-05, + "loss": 0.21350860595703125, + "step": 531 + }, + { + "epoch": 0.03596052453697445, + "grad_norm": 1.206635594367981, + "learning_rate": 2.9999868994502403e-05, + "loss": 0.1246490478515625, + "step": 532 + }, + { + "epoch": 0.03602811950790861, + "grad_norm": 2.425391912460327, + "learning_rate": 2.999985484157674e-05, + "loss": 0.203399658203125, + "step": 533 + }, + { + "epoch": 0.03609571447884277, + "grad_norm": 1.711814522743225, + "learning_rate": 2.999983996286481e-05, + "loss": 0.08929443359375, + "step": 534 + }, + { + "epoch": 0.036163309449776936, + "grad_norm": 4.02428674697876, + "learning_rate": 2.9999824358367344e-05, + "loss": 0.291656494140625, + "step": 535 + }, + { + "epoch": 0.0362309044207111, + "grad_norm": 8.798018455505371, + "learning_rate": 2.999980802808509e-05, + "loss": 0.41668701171875, + "step": 536 + }, + { + "epoch": 0.03629849939164526, + "grad_norm": 3.76615309715271, + "learning_rate": 2.9999790972018845e-05, + "loss": 0.184814453125, + "step": 537 + }, + { + "epoch": 0.036366094362579426, + "grad_norm": 3.546931028366089, + "learning_rate": 2.9999773190169423e-05, + "loss": 0.12886810302734375, + "step": 538 + }, + { + "epoch": 0.03643368933351359, + "grad_norm": 4.962716579437256, + "learning_rate": 2.9999754682537697e-05, + "loss": 0.2889404296875, + "step": 539 + }, + { + "epoch": 0.03650128430444775, + "grad_norm": 6.313519477844238, + "learning_rate": 2.9999735449124556e-05, + "loss": 0.1430816650390625, + "step": 540 + }, + { + "epoch": 0.03656887927538191, + "grad_norm": 1.229411005973816, + "learning_rate": 2.999971548993093e-05, + "loss": 0.043918609619140625, + "step": 541 + }, + { + "epoch": 0.036636474246316074, + "grad_norm": 2.674504280090332, + "learning_rate": 2.999969480495779e-05, + "loss": 0.2655029296875, + "step": 542 + }, + { + "epoch": 0.03670406921725024, + "grad_norm": 5.411997318267822, + "learning_rate": 2.999967339420613e-05, + "loss": 0.181243896484375, + "step": 543 + }, + { + "epoch": 0.0367716641881844, + "grad_norm": 3.011751890182495, + "learning_rate": 2.9999651257676987e-05, + "loss": 0.1518402099609375, + "step": 544 + }, + { + "epoch": 0.036839259159118565, + "grad_norm": 3.9834916591644287, + "learning_rate": 2.999962839537144e-05, + "loss": 0.16900634765625, + "step": 545 + }, + { + "epoch": 0.03690685413005272, + "grad_norm": 12.365861892700195, + "learning_rate": 2.9999604807290582e-05, + "loss": 0.35357666015625, + "step": 546 + }, + { + "epoch": 0.036974449100986885, + "grad_norm": 1.8154349327087402, + "learning_rate": 2.999958049343557e-05, + "loss": 0.0653533935546875, + "step": 547 + }, + { + "epoch": 0.03704204407192105, + "grad_norm": 0.9938412308692932, + "learning_rate": 2.999955545380757e-05, + "loss": 0.06389617919921875, + "step": 548 + }, + { + "epoch": 0.03710963904285521, + "grad_norm": 6.264145851135254, + "learning_rate": 2.9999529688407792e-05, + "loss": 0.217132568359375, + "step": 549 + }, + { + "epoch": 0.037177234013789376, + "grad_norm": 3.956162214279175, + "learning_rate": 2.999950319723749e-05, + "loss": 0.220489501953125, + "step": 550 + }, + { + "epoch": 0.03724482898472354, + "grad_norm": 1.0403892993927002, + "learning_rate": 2.9999475980297943e-05, + "loss": 0.176971435546875, + "step": 551 + }, + { + "epoch": 0.037312423955657696, + "grad_norm": 1.463651418685913, + "learning_rate": 2.9999448037590466e-05, + "loss": 0.0677490234375, + "step": 552 + }, + { + "epoch": 0.03738001892659186, + "grad_norm": 2.123817205429077, + "learning_rate": 2.9999419369116414e-05, + "loss": 0.07464599609375, + "step": 553 + }, + { + "epoch": 0.03744761389752602, + "grad_norm": 5.314402103424072, + "learning_rate": 2.999938997487717e-05, + "loss": 0.18651580810546875, + "step": 554 + }, + { + "epoch": 0.03751520886846019, + "grad_norm": 2.477102279663086, + "learning_rate": 2.999935985487416e-05, + "loss": 0.166168212890625, + "step": 555 + }, + { + "epoch": 0.03758280383939435, + "grad_norm": 4.112088680267334, + "learning_rate": 2.999932900910884e-05, + "loss": 0.2740478515625, + "step": 556 + }, + { + "epoch": 0.037650398810328514, + "grad_norm": 2.755594491958618, + "learning_rate": 2.9999297437582704e-05, + "loss": 0.28411865234375, + "step": 557 + }, + { + "epoch": 0.03771799378126267, + "grad_norm": 6.643670558929443, + "learning_rate": 2.9999265140297276e-05, + "loss": 0.186279296875, + "step": 558 + }, + { + "epoch": 0.037785588752196834, + "grad_norm": 5.115291595458984, + "learning_rate": 2.999923211725412e-05, + "loss": 0.15850830078125, + "step": 559 + }, + { + "epoch": 0.037853183723131, + "grad_norm": 4.5053229331970215, + "learning_rate": 2.999919836845484e-05, + "loss": 0.0749969482421875, + "step": 560 + }, + { + "epoch": 0.03792077869406516, + "grad_norm": 3.1784040927886963, + "learning_rate": 2.9999163893901057e-05, + "loss": 0.241119384765625, + "step": 561 + }, + { + "epoch": 0.037988373664999325, + "grad_norm": 0.8416277766227722, + "learning_rate": 2.9999128693594456e-05, + "loss": 0.09881591796875, + "step": 562 + }, + { + "epoch": 0.03805596863593349, + "grad_norm": 1.9229607582092285, + "learning_rate": 2.999909276753672e-05, + "loss": 0.12961578369140625, + "step": 563 + }, + { + "epoch": 0.038123563606867646, + "grad_norm": 0.9888301491737366, + "learning_rate": 2.9999056115729605e-05, + "loss": 0.079620361328125, + "step": 564 + }, + { + "epoch": 0.03819115857780181, + "grad_norm": 1.8456045389175415, + "learning_rate": 2.9999018738174873e-05, + "loss": 0.07819747924804688, + "step": 565 + }, + { + "epoch": 0.03825875354873597, + "grad_norm": 1.1665289402008057, + "learning_rate": 2.999898063487434e-05, + "loss": 0.106475830078125, + "step": 566 + }, + { + "epoch": 0.038326348519670136, + "grad_norm": 1.5903397798538208, + "learning_rate": 2.9998941805829836e-05, + "loss": 0.1674346923828125, + "step": 567 + }, + { + "epoch": 0.0383939434906043, + "grad_norm": 0.9152625799179077, + "learning_rate": 2.9998902251043263e-05, + "loss": 0.1081085205078125, + "step": 568 + }, + { + "epoch": 0.038461538461538464, + "grad_norm": 3.042475700378418, + "learning_rate": 2.999886197051652e-05, + "loss": 0.1288604736328125, + "step": 569 + }, + { + "epoch": 0.03852913343247263, + "grad_norm": 1.91112220287323, + "learning_rate": 2.999882096425155e-05, + "loss": 0.145751953125, + "step": 570 + }, + { + "epoch": 0.038596728403406784, + "grad_norm": 1.2738869190216064, + "learning_rate": 2.999877923225035e-05, + "loss": 0.1277008056640625, + "step": 571 + }, + { + "epoch": 0.03866432337434095, + "grad_norm": 1.7105028629302979, + "learning_rate": 2.9998736774514933e-05, + "loss": 0.095245361328125, + "step": 572 + }, + { + "epoch": 0.03873191834527511, + "grad_norm": 1.3043365478515625, + "learning_rate": 2.9998693591047358e-05, + "loss": 0.09619140625, + "step": 573 + }, + { + "epoch": 0.038799513316209275, + "grad_norm": 2.6658878326416016, + "learning_rate": 2.9998649681849707e-05, + "loss": 0.23172760009765625, + "step": 574 + }, + { + "epoch": 0.03886710828714344, + "grad_norm": 1.170517086982727, + "learning_rate": 2.999860504692411e-05, + "loss": 0.13622283935546875, + "step": 575 + }, + { + "epoch": 0.0389347032580776, + "grad_norm": 5.829751968383789, + "learning_rate": 2.999855968627273e-05, + "loss": 0.2633056640625, + "step": 576 + }, + { + "epoch": 0.03900229822901176, + "grad_norm": 2.6268463134765625, + "learning_rate": 2.9998513599897752e-05, + "loss": 0.11310577392578125, + "step": 577 + }, + { + "epoch": 0.03906989319994592, + "grad_norm": 0.45539626479148865, + "learning_rate": 2.999846678780141e-05, + "loss": 0.03356170654296875, + "step": 578 + }, + { + "epoch": 0.039137488170880086, + "grad_norm": 2.5685315132141113, + "learning_rate": 2.9998419249985975e-05, + "loss": 0.1523895263671875, + "step": 579 + }, + { + "epoch": 0.03920508314181425, + "grad_norm": 3.0577034950256348, + "learning_rate": 2.999837098645374e-05, + "loss": 0.1978607177734375, + "step": 580 + }, + { + "epoch": 0.03927267811274841, + "grad_norm": 2.095142364501953, + "learning_rate": 2.9998321997207045e-05, + "loss": 0.0693359375, + "step": 581 + }, + { + "epoch": 0.03934027308368258, + "grad_norm": 6.029660224914551, + "learning_rate": 2.9998272282248255e-05, + "loss": 0.19866943359375, + "step": 582 + }, + { + "epoch": 0.03940786805461673, + "grad_norm": 6.7904133796691895, + "learning_rate": 2.999822184157978e-05, + "loss": 0.208099365234375, + "step": 583 + }, + { + "epoch": 0.0394754630255509, + "grad_norm": 1.465585470199585, + "learning_rate": 2.9998170675204062e-05, + "loss": 0.10870361328125, + "step": 584 + }, + { + "epoch": 0.03954305799648506, + "grad_norm": 2.533151388168335, + "learning_rate": 2.9998118783123572e-05, + "loss": 0.1712188720703125, + "step": 585 + }, + { + "epoch": 0.039610652967419224, + "grad_norm": 1.4378745555877686, + "learning_rate": 2.999806616534082e-05, + "loss": 0.067718505859375, + "step": 586 + }, + { + "epoch": 0.03967824793835339, + "grad_norm": 1.4923158884048462, + "learning_rate": 2.9998012821858358e-05, + "loss": 0.093048095703125, + "step": 587 + }, + { + "epoch": 0.03974584290928755, + "grad_norm": 1.2093175649642944, + "learning_rate": 2.9997958752678767e-05, + "loss": 0.06381988525390625, + "step": 588 + }, + { + "epoch": 0.03981343788022171, + "grad_norm": 5.948870658874512, + "learning_rate": 2.9997903957804654e-05, + "loss": 0.211395263671875, + "step": 589 + }, + { + "epoch": 0.03988103285115587, + "grad_norm": 6.050867080688477, + "learning_rate": 2.999784843723868e-05, + "loss": 0.21475982666015625, + "step": 590 + }, + { + "epoch": 0.039948627822090035, + "grad_norm": 1.9961919784545898, + "learning_rate": 2.9997792190983525e-05, + "loss": 0.2201690673828125, + "step": 591 + }, + { + "epoch": 0.0400162227930242, + "grad_norm": 1.0976548194885254, + "learning_rate": 2.9997735219041922e-05, + "loss": 0.148834228515625, + "step": 592 + }, + { + "epoch": 0.04008381776395836, + "grad_norm": 5.608518600463867, + "learning_rate": 2.9997677521416612e-05, + "loss": 0.22216796875, + "step": 593 + }, + { + "epoch": 0.040151412734892526, + "grad_norm": 3.4290688037872314, + "learning_rate": 2.9997619098110394e-05, + "loss": 0.209747314453125, + "step": 594 + }, + { + "epoch": 0.04021900770582669, + "grad_norm": 6.896137714385986, + "learning_rate": 2.9997559949126093e-05, + "loss": 0.2867431640625, + "step": 595 + }, + { + "epoch": 0.040286602676760846, + "grad_norm": 2.5790278911590576, + "learning_rate": 2.999750007446658e-05, + "loss": 0.333740234375, + "step": 596 + }, + { + "epoch": 0.04035419764769501, + "grad_norm": 1.8087307214736938, + "learning_rate": 2.9997439474134738e-05, + "loss": 0.15673828125, + "step": 597 + }, + { + "epoch": 0.040421792618629174, + "grad_norm": 0.971864640712738, + "learning_rate": 2.999737814813351e-05, + "loss": 0.10166168212890625, + "step": 598 + }, + { + "epoch": 0.04048938758956334, + "grad_norm": 1.1200042963027954, + "learning_rate": 2.999731609646586e-05, + "loss": 0.1481170654296875, + "step": 599 + }, + { + "epoch": 0.0405569825604975, + "grad_norm": 0.5449658632278442, + "learning_rate": 2.999725331913479e-05, + "loss": 0.0828094482421875, + "step": 600 + }, + { + "epoch": 0.040624577531431665, + "grad_norm": 0.5190967917442322, + "learning_rate": 2.9997189816143334e-05, + "loss": 0.04819488525390625, + "step": 601 + }, + { + "epoch": 0.04069217250236582, + "grad_norm": 4.724027156829834, + "learning_rate": 2.999712558749457e-05, + "loss": 0.37933349609375, + "step": 602 + }, + { + "epoch": 0.040759767473299985, + "grad_norm": 3.786332130432129, + "learning_rate": 2.9997060633191604e-05, + "loss": 0.14807891845703125, + "step": 603 + }, + { + "epoch": 0.04082736244423415, + "grad_norm": 2.145217180252075, + "learning_rate": 2.9996994953237578e-05, + "loss": 0.170989990234375, + "step": 604 + }, + { + "epoch": 0.04089495741516831, + "grad_norm": 2.7757837772369385, + "learning_rate": 2.9996928547635675e-05, + "loss": 0.1333465576171875, + "step": 605 + }, + { + "epoch": 0.040962552386102476, + "grad_norm": 3.390260696411133, + "learning_rate": 2.9996861416389102e-05, + "loss": 0.1108551025390625, + "step": 606 + }, + { + "epoch": 0.04103014735703664, + "grad_norm": 0.9660534858703613, + "learning_rate": 2.9996793559501106e-05, + "loss": 0.19842529296875, + "step": 607 + }, + { + "epoch": 0.041097742327970796, + "grad_norm": 0.9646946787834167, + "learning_rate": 2.9996724976974973e-05, + "loss": 0.09221649169921875, + "step": 608 + }, + { + "epoch": 0.04116533729890496, + "grad_norm": 1.3567506074905396, + "learning_rate": 2.9996655668814027e-05, + "loss": 0.1533203125, + "step": 609 + }, + { + "epoch": 0.04123293226983912, + "grad_norm": 0.47127199172973633, + "learning_rate": 2.9996585635021613e-05, + "loss": 0.0546875, + "step": 610 + }, + { + "epoch": 0.04130052724077329, + "grad_norm": 0.6610764861106873, + "learning_rate": 2.9996514875601124e-05, + "loss": 0.0869903564453125, + "step": 611 + }, + { + "epoch": 0.04136812221170745, + "grad_norm": 5.175927639007568, + "learning_rate": 2.9996443390555986e-05, + "loss": 0.259002685546875, + "step": 612 + }, + { + "epoch": 0.041435717182641614, + "grad_norm": 3.7743046283721924, + "learning_rate": 2.9996371179889652e-05, + "loss": 0.241302490234375, + "step": 613 + }, + { + "epoch": 0.04150331215357577, + "grad_norm": 0.7145547270774841, + "learning_rate": 2.9996298243605616e-05, + "loss": 0.1033172607421875, + "step": 614 + }, + { + "epoch": 0.041570907124509934, + "grad_norm": 1.0458157062530518, + "learning_rate": 2.9996224581707413e-05, + "loss": 0.09912109375, + "step": 615 + }, + { + "epoch": 0.0416385020954441, + "grad_norm": 1.4104902744293213, + "learning_rate": 2.9996150194198606e-05, + "loss": 0.1150054931640625, + "step": 616 + }, + { + "epoch": 0.04170609706637826, + "grad_norm": 1.4184973239898682, + "learning_rate": 2.9996075081082792e-05, + "loss": 0.21630859375, + "step": 617 + }, + { + "epoch": 0.041773692037312425, + "grad_norm": 2.705051898956299, + "learning_rate": 2.99959992423636e-05, + "loss": 0.20452880859375, + "step": 618 + }, + { + "epoch": 0.04184128700824659, + "grad_norm": 0.9395461678504944, + "learning_rate": 2.9995922678044713e-05, + "loss": 0.1195831298828125, + "step": 619 + }, + { + "epoch": 0.04190888197918075, + "grad_norm": 5.142083644866943, + "learning_rate": 2.9995845388129827e-05, + "loss": 0.3070068359375, + "step": 620 + }, + { + "epoch": 0.04197647695011491, + "grad_norm": 1.686903476715088, + "learning_rate": 2.999576737262268e-05, + "loss": 0.201202392578125, + "step": 621 + }, + { + "epoch": 0.04204407192104907, + "grad_norm": 3.524895191192627, + "learning_rate": 2.999568863152705e-05, + "loss": 0.16937255859375, + "step": 622 + }, + { + "epoch": 0.042111666891983236, + "grad_norm": 1.1167711019515991, + "learning_rate": 2.999560916484675e-05, + "loss": 0.22357177734375, + "step": 623 + }, + { + "epoch": 0.0421792618629174, + "grad_norm": 2.4972798824310303, + "learning_rate": 2.9995528972585623e-05, + "loss": 0.26141357421875, + "step": 624 + }, + { + "epoch": 0.042246856833851563, + "grad_norm": 1.582486867904663, + "learning_rate": 2.9995448054747543e-05, + "loss": 0.0858001708984375, + "step": 625 + }, + { + "epoch": 0.04231445180478573, + "grad_norm": 1.335440754890442, + "learning_rate": 2.9995366411336432e-05, + "loss": 0.1627197265625, + "step": 626 + }, + { + "epoch": 0.042382046775719884, + "grad_norm": 1.4878231287002563, + "learning_rate": 2.999528404235624e-05, + "loss": 0.122467041015625, + "step": 627 + }, + { + "epoch": 0.04244964174665405, + "grad_norm": 1.551517128944397, + "learning_rate": 2.999520094781095e-05, + "loss": 0.055423736572265625, + "step": 628 + }, + { + "epoch": 0.04251723671758821, + "grad_norm": 7.017040729522705, + "learning_rate": 2.9995117127704582e-05, + "loss": 0.2899169921875, + "step": 629 + }, + { + "epoch": 0.042584831688522375, + "grad_norm": 0.8104062080383301, + "learning_rate": 2.99950325820412e-05, + "loss": 0.06592559814453125, + "step": 630 + }, + { + "epoch": 0.04265242665945654, + "grad_norm": 2.6087536811828613, + "learning_rate": 2.9994947310824884e-05, + "loss": 0.22599029541015625, + "step": 631 + }, + { + "epoch": 0.0427200216303907, + "grad_norm": 3.7038626670837402, + "learning_rate": 2.9994861314059766e-05, + "loss": 0.192962646484375, + "step": 632 + }, + { + "epoch": 0.04278761660132486, + "grad_norm": 1.8107455968856812, + "learning_rate": 2.9994774591750002e-05, + "loss": 0.2213134765625, + "step": 633 + }, + { + "epoch": 0.04285521157225902, + "grad_norm": 1.044893741607666, + "learning_rate": 2.99946871438998e-05, + "loss": 0.0982208251953125, + "step": 634 + }, + { + "epoch": 0.042922806543193186, + "grad_norm": 6.845047950744629, + "learning_rate": 2.9994598970513377e-05, + "loss": 0.259033203125, + "step": 635 + }, + { + "epoch": 0.04299040151412735, + "grad_norm": 1.644531488418579, + "learning_rate": 2.9994510071595002e-05, + "loss": 0.11474227905273438, + "step": 636 + }, + { + "epoch": 0.04305799648506151, + "grad_norm": 2.680795192718506, + "learning_rate": 2.9994420447148988e-05, + "loss": 0.306396484375, + "step": 637 + }, + { + "epoch": 0.04312559145599568, + "grad_norm": 4.967638969421387, + "learning_rate": 2.9994330097179657e-05, + "loss": 0.1942138671875, + "step": 638 + }, + { + "epoch": 0.04319318642692983, + "grad_norm": 2.1872169971466064, + "learning_rate": 2.9994239021691394e-05, + "loss": 0.09991455078125, + "step": 639 + }, + { + "epoch": 0.043260781397864, + "grad_norm": 4.678313732147217, + "learning_rate": 2.9994147220688593e-05, + "loss": 0.1345062255859375, + "step": 640 + }, + { + "epoch": 0.04332837636879816, + "grad_norm": 5.565011024475098, + "learning_rate": 2.9994054694175703e-05, + "loss": 0.28887939453125, + "step": 641 + }, + { + "epoch": 0.043395971339732324, + "grad_norm": 2.215383291244507, + "learning_rate": 2.9993961442157203e-05, + "loss": 0.236236572265625, + "step": 642 + }, + { + "epoch": 0.04346356631066649, + "grad_norm": 1.3952069282531738, + "learning_rate": 2.9993867464637596e-05, + "loss": 0.091400146484375, + "step": 643 + }, + { + "epoch": 0.04353116128160065, + "grad_norm": 0.3855605721473694, + "learning_rate": 2.9993772761621442e-05, + "loss": 0.02733612060546875, + "step": 644 + }, + { + "epoch": 0.04359875625253481, + "grad_norm": 1.589082956314087, + "learning_rate": 2.9993677333113312e-05, + "loss": 0.21820068359375, + "step": 645 + }, + { + "epoch": 0.04366635122346897, + "grad_norm": 0.5457358360290527, + "learning_rate": 2.9993581179117827e-05, + "loss": 0.09429931640625, + "step": 646 + }, + { + "epoch": 0.043733946194403135, + "grad_norm": 1.4612857103347778, + "learning_rate": 2.9993484299639646e-05, + "loss": 0.21929931640625, + "step": 647 + }, + { + "epoch": 0.0438015411653373, + "grad_norm": 0.5436586141586304, + "learning_rate": 2.9993386694683445e-05, + "loss": 0.07379150390625, + "step": 648 + }, + { + "epoch": 0.04386913613627146, + "grad_norm": 1.3588247299194336, + "learning_rate": 2.9993288364253956e-05, + "loss": 0.058528900146484375, + "step": 649 + }, + { + "epoch": 0.043936731107205626, + "grad_norm": 2.496009588241577, + "learning_rate": 2.9993189308355933e-05, + "loss": 0.2073822021484375, + "step": 650 + }, + { + "epoch": 0.04400432607813979, + "grad_norm": 0.6257890462875366, + "learning_rate": 2.999308952699417e-05, + "loss": 0.04880523681640625, + "step": 651 + }, + { + "epoch": 0.044071921049073946, + "grad_norm": 0.9141761064529419, + "learning_rate": 2.9992989020173495e-05, + "loss": 0.131591796875, + "step": 652 + }, + { + "epoch": 0.04413951602000811, + "grad_norm": 1.063555121421814, + "learning_rate": 2.9992887787898772e-05, + "loss": 0.151641845703125, + "step": 653 + }, + { + "epoch": 0.044207110990942274, + "grad_norm": 2.0242061614990234, + "learning_rate": 2.9992785830174895e-05, + "loss": 0.172882080078125, + "step": 654 + }, + { + "epoch": 0.04427470596187644, + "grad_norm": 3.1551852226257324, + "learning_rate": 2.99926831470068e-05, + "loss": 0.22296142578125, + "step": 655 + }, + { + "epoch": 0.0443423009328106, + "grad_norm": 5.816063404083252, + "learning_rate": 2.999257973839946e-05, + "loss": 0.26190185546875, + "step": 656 + }, + { + "epoch": 0.044409895903744764, + "grad_norm": 0.6147264838218689, + "learning_rate": 2.9992475604357872e-05, + "loss": 0.09088134765625, + "step": 657 + }, + { + "epoch": 0.04447749087467892, + "grad_norm": 3.6151139736175537, + "learning_rate": 2.9992370744887078e-05, + "loss": 0.31097412109375, + "step": 658 + }, + { + "epoch": 0.044545085845613085, + "grad_norm": 0.6039172410964966, + "learning_rate": 2.999226515999215e-05, + "loss": 0.04837799072265625, + "step": 659 + }, + { + "epoch": 0.04461268081654725, + "grad_norm": 1.5181437730789185, + "learning_rate": 2.9992158849678196e-05, + "loss": 0.09922027587890625, + "step": 660 + }, + { + "epoch": 0.04468027578748141, + "grad_norm": 1.837937355041504, + "learning_rate": 2.9992051813950364e-05, + "loss": 0.191680908203125, + "step": 661 + }, + { + "epoch": 0.044747870758415575, + "grad_norm": 1.6884273290634155, + "learning_rate": 2.9991944052813833e-05, + "loss": 0.12548828125, + "step": 662 + }, + { + "epoch": 0.04481546572934974, + "grad_norm": 0.8294028043746948, + "learning_rate": 2.9991835566273806e-05, + "loss": 0.0732269287109375, + "step": 663 + }, + { + "epoch": 0.044883060700283896, + "grad_norm": 5.481712818145752, + "learning_rate": 2.9991726354335546e-05, + "loss": 0.1942138671875, + "step": 664 + }, + { + "epoch": 0.04495065567121806, + "grad_norm": 3.4908294677734375, + "learning_rate": 2.9991616417004335e-05, + "loss": 0.1374359130859375, + "step": 665 + }, + { + "epoch": 0.04501825064215222, + "grad_norm": 1.2023183107376099, + "learning_rate": 2.999150575428549e-05, + "loss": 0.07750701904296875, + "step": 666 + }, + { + "epoch": 0.04508584561308639, + "grad_norm": 3.0045814514160156, + "learning_rate": 2.9991394366184363e-05, + "loss": 0.183319091796875, + "step": 667 + }, + { + "epoch": 0.04515344058402055, + "grad_norm": 0.9485216736793518, + "learning_rate": 2.9991282252706347e-05, + "loss": 0.1080780029296875, + "step": 668 + }, + { + "epoch": 0.045221035554954714, + "grad_norm": 1.1388517618179321, + "learning_rate": 2.9991169413856866e-05, + "loss": 0.1069488525390625, + "step": 669 + }, + { + "epoch": 0.04528863052588887, + "grad_norm": 2.8197739124298096, + "learning_rate": 2.999105584964138e-05, + "loss": 0.25775146484375, + "step": 670 + }, + { + "epoch": 0.045356225496823034, + "grad_norm": 2.553666114807129, + "learning_rate": 2.999094156006538e-05, + "loss": 0.1114959716796875, + "step": 671 + }, + { + "epoch": 0.0454238204677572, + "grad_norm": 3.7600884437561035, + "learning_rate": 2.9990826545134405e-05, + "loss": 0.196136474609375, + "step": 672 + }, + { + "epoch": 0.04549141543869136, + "grad_norm": 0.8074367642402649, + "learning_rate": 2.999071080485401e-05, + "loss": 0.0748291015625, + "step": 673 + }, + { + "epoch": 0.045559010409625525, + "grad_norm": 0.5439823865890503, + "learning_rate": 2.9990594339229805e-05, + "loss": 0.11681365966796875, + "step": 674 + }, + { + "epoch": 0.04562660538055969, + "grad_norm": 3.8376853466033936, + "learning_rate": 2.9990477148267416e-05, + "loss": 0.2442779541015625, + "step": 675 + }, + { + "epoch": 0.04569420035149385, + "grad_norm": 1.8159642219543457, + "learning_rate": 2.9990359231972517e-05, + "loss": 0.08238983154296875, + "step": 676 + }, + { + "epoch": 0.04576179532242801, + "grad_norm": 2.358569860458374, + "learning_rate": 2.9990240590350816e-05, + "loss": 0.30096435546875, + "step": 677 + }, + { + "epoch": 0.04582939029336217, + "grad_norm": 0.9774882197380066, + "learning_rate": 2.9990121223408054e-05, + "loss": 0.22418212890625, + "step": 678 + }, + { + "epoch": 0.045896985264296336, + "grad_norm": 0.6792570352554321, + "learning_rate": 2.9990001131150003e-05, + "loss": 0.060695648193359375, + "step": 679 + }, + { + "epoch": 0.0459645802352305, + "grad_norm": 0.9476651549339294, + "learning_rate": 2.9989880313582477e-05, + "loss": 0.09035491943359375, + "step": 680 + }, + { + "epoch": 0.04603217520616466, + "grad_norm": 0.3282974660396576, + "learning_rate": 2.998975877071132e-05, + "loss": 0.035175323486328125, + "step": 681 + }, + { + "epoch": 0.04609977017709883, + "grad_norm": 1.8305168151855469, + "learning_rate": 2.998963650254241e-05, + "loss": 0.20367431640625, + "step": 682 + }, + { + "epoch": 0.046167365148032984, + "grad_norm": 2.2691144943237305, + "learning_rate": 2.9989513509081672e-05, + "loss": 0.13964080810546875, + "step": 683 + }, + { + "epoch": 0.04623496011896715, + "grad_norm": 5.941413879394531, + "learning_rate": 2.9989389790335043e-05, + "loss": 0.271575927734375, + "step": 684 + }, + { + "epoch": 0.04630255508990131, + "grad_norm": 0.7804389595985413, + "learning_rate": 2.9989265346308528e-05, + "loss": 0.1117095947265625, + "step": 685 + }, + { + "epoch": 0.046370150060835474, + "grad_norm": 1.889501690864563, + "learning_rate": 2.9989140177008134e-05, + "loss": 0.173736572265625, + "step": 686 + }, + { + "epoch": 0.04643774503176964, + "grad_norm": 5.097761154174805, + "learning_rate": 2.998901428243992e-05, + "loss": 0.25439453125, + "step": 687 + }, + { + "epoch": 0.0465053400027038, + "grad_norm": 2.0263984203338623, + "learning_rate": 2.9988887662609983e-05, + "loss": 0.160736083984375, + "step": 688 + }, + { + "epoch": 0.04657293497363796, + "grad_norm": 0.9918971061706543, + "learning_rate": 2.9988760317524443e-05, + "loss": 0.0804290771484375, + "step": 689 + }, + { + "epoch": 0.04664052994457212, + "grad_norm": 2.1114110946655273, + "learning_rate": 2.9988632247189466e-05, + "loss": 0.1050262451171875, + "step": 690 + }, + { + "epoch": 0.046708124915506286, + "grad_norm": 5.271144390106201, + "learning_rate": 2.998850345161125e-05, + "loss": 0.24810791015625, + "step": 691 + }, + { + "epoch": 0.04677571988644045, + "grad_norm": 0.595212459564209, + "learning_rate": 2.9988373930796017e-05, + "loss": 0.03955078125, + "step": 692 + }, + { + "epoch": 0.04684331485737461, + "grad_norm": 1.927976369857788, + "learning_rate": 2.9988243684750047e-05, + "loss": 0.1566009521484375, + "step": 693 + }, + { + "epoch": 0.046910909828308776, + "grad_norm": 2.2192440032958984, + "learning_rate": 2.9988112713479637e-05, + "loss": 0.154693603515625, + "step": 694 + }, + { + "epoch": 0.04697850479924293, + "grad_norm": 0.6432027816772461, + "learning_rate": 2.9987981016991125e-05, + "loss": 0.05022430419921875, + "step": 695 + }, + { + "epoch": 0.0470460997701771, + "grad_norm": 2.913339614868164, + "learning_rate": 2.998784859529088e-05, + "loss": 0.26513671875, + "step": 696 + }, + { + "epoch": 0.04711369474111126, + "grad_norm": 1.1951630115509033, + "learning_rate": 2.998771544838531e-05, + "loss": 0.152984619140625, + "step": 697 + }, + { + "epoch": 0.047181289712045424, + "grad_norm": 1.17485773563385, + "learning_rate": 2.998758157628086e-05, + "loss": 0.11492919921875, + "step": 698 + }, + { + "epoch": 0.04724888468297959, + "grad_norm": 1.869964599609375, + "learning_rate": 2.9987446978984007e-05, + "loss": 0.152099609375, + "step": 699 + }, + { + "epoch": 0.04731647965391375, + "grad_norm": 0.3565690815448761, + "learning_rate": 2.998731165650126e-05, + "loss": 0.05519866943359375, + "step": 700 + }, + { + "epoch": 0.047384074624847915, + "grad_norm": 1.7244020700454712, + "learning_rate": 2.9987175608839176e-05, + "loss": 0.185699462890625, + "step": 701 + }, + { + "epoch": 0.04745166959578207, + "grad_norm": 2.400352716445923, + "learning_rate": 2.9987038836004328e-05, + "loss": 0.1707763671875, + "step": 702 + }, + { + "epoch": 0.047519264566716235, + "grad_norm": 1.2897359132766724, + "learning_rate": 2.998690133800334e-05, + "loss": 0.0645904541015625, + "step": 703 + }, + { + "epoch": 0.0475868595376504, + "grad_norm": 1.308562159538269, + "learning_rate": 2.998676311484286e-05, + "loss": 0.184051513671875, + "step": 704 + }, + { + "epoch": 0.04765445450858456, + "grad_norm": 1.852209210395813, + "learning_rate": 2.9986624166529582e-05, + "loss": 0.2381591796875, + "step": 705 + }, + { + "epoch": 0.047722049479518726, + "grad_norm": 0.6582621932029724, + "learning_rate": 2.9986484493070226e-05, + "loss": 0.0594329833984375, + "step": 706 + }, + { + "epoch": 0.04778964445045289, + "grad_norm": 1.3969374895095825, + "learning_rate": 2.9986344094471545e-05, + "loss": 0.156463623046875, + "step": 707 + }, + { + "epoch": 0.047857239421387046, + "grad_norm": 2.2738630771636963, + "learning_rate": 2.9986202970740344e-05, + "loss": 0.28802490234375, + "step": 708 + }, + { + "epoch": 0.04792483439232121, + "grad_norm": 0.7051920890808105, + "learning_rate": 2.998606112188344e-05, + "loss": 0.0460052490234375, + "step": 709 + }, + { + "epoch": 0.04799242936325537, + "grad_norm": 2.412787914276123, + "learning_rate": 2.9985918547907703e-05, + "loss": 0.17945098876953125, + "step": 710 + }, + { + "epoch": 0.04806002433418954, + "grad_norm": 1.4123775959014893, + "learning_rate": 2.9985775248820033e-05, + "loss": 0.0631561279296875, + "step": 711 + }, + { + "epoch": 0.0481276193051237, + "grad_norm": 2.553199529647827, + "learning_rate": 2.9985631224627355e-05, + "loss": 0.15838623046875, + "step": 712 + }, + { + "epoch": 0.048195214276057864, + "grad_norm": 1.6653141975402832, + "learning_rate": 2.998548647533665e-05, + "loss": 0.21832275390625, + "step": 713 + }, + { + "epoch": 0.04826280924699202, + "grad_norm": 2.0533864498138428, + "learning_rate": 2.9985341000954916e-05, + "loss": 0.24365234375, + "step": 714 + }, + { + "epoch": 0.048330404217926184, + "grad_norm": 0.5530652403831482, + "learning_rate": 2.9985194801489188e-05, + "loss": 0.04839324951171875, + "step": 715 + }, + { + "epoch": 0.04839799918886035, + "grad_norm": 0.9529194235801697, + "learning_rate": 2.9985047876946543e-05, + "loss": 0.12735366821289062, + "step": 716 + }, + { + "epoch": 0.04846559415979451, + "grad_norm": 1.9679453372955322, + "learning_rate": 2.9984900227334093e-05, + "loss": 0.07374191284179688, + "step": 717 + }, + { + "epoch": 0.048533189130728675, + "grad_norm": 1.4662069082260132, + "learning_rate": 2.998475185265898e-05, + "loss": 0.0842132568359375, + "step": 718 + }, + { + "epoch": 0.04860078410166284, + "grad_norm": 2.192143440246582, + "learning_rate": 2.998460275292838e-05, + "loss": 0.14568710327148438, + "step": 719 + }, + { + "epoch": 0.048668379072596996, + "grad_norm": 1.958990454673767, + "learning_rate": 2.9984452928149514e-05, + "loss": 0.10567474365234375, + "step": 720 + }, + { + "epoch": 0.04873597404353116, + "grad_norm": 2.984832525253296, + "learning_rate": 2.9984302378329623e-05, + "loss": 0.20819091796875, + "step": 721 + }, + { + "epoch": 0.04880356901446532, + "grad_norm": 3.579226016998291, + "learning_rate": 2.9984151103476004e-05, + "loss": 0.2218780517578125, + "step": 722 + }, + { + "epoch": 0.048871163985399486, + "grad_norm": 4.486825942993164, + "learning_rate": 2.9983999103595965e-05, + "loss": 0.18950653076171875, + "step": 723 + }, + { + "epoch": 0.04893875895633365, + "grad_norm": 2.377448081970215, + "learning_rate": 2.9983846378696865e-05, + "loss": 0.210418701171875, + "step": 724 + }, + { + "epoch": 0.049006353927267814, + "grad_norm": 2.092395544052124, + "learning_rate": 2.9983692928786094e-05, + "loss": 0.2496795654296875, + "step": 725 + }, + { + "epoch": 0.04907394889820197, + "grad_norm": 1.1269516944885254, + "learning_rate": 2.998353875387108e-05, + "loss": 0.08303546905517578, + "step": 726 + }, + { + "epoch": 0.049141543869136134, + "grad_norm": 4.097463130950928, + "learning_rate": 2.9983383853959273e-05, + "loss": 0.232147216796875, + "step": 727 + }, + { + "epoch": 0.0492091388400703, + "grad_norm": 2.835012674331665, + "learning_rate": 2.9983228229058178e-05, + "loss": 0.213134765625, + "step": 728 + }, + { + "epoch": 0.04927673381100446, + "grad_norm": 6.500109672546387, + "learning_rate": 2.9983071879175322e-05, + "loss": 0.18930816650390625, + "step": 729 + }, + { + "epoch": 0.049344328781938625, + "grad_norm": 0.8616926670074463, + "learning_rate": 2.9982914804318267e-05, + "loss": 0.12651824951171875, + "step": 730 + }, + { + "epoch": 0.04941192375287279, + "grad_norm": 2.958159923553467, + "learning_rate": 2.9982757004494617e-05, + "loss": 0.16909027099609375, + "step": 731 + }, + { + "epoch": 0.04947951872380695, + "grad_norm": 0.41564998030662537, + "learning_rate": 2.998259847971201e-05, + "loss": 0.04192352294921875, + "step": 732 + }, + { + "epoch": 0.04954711369474111, + "grad_norm": 0.8122474551200867, + "learning_rate": 2.998243922997811e-05, + "loss": 0.195648193359375, + "step": 733 + }, + { + "epoch": 0.04961470866567527, + "grad_norm": 0.9609103798866272, + "learning_rate": 2.9982279255300628e-05, + "loss": 0.07404327392578125, + "step": 734 + }, + { + "epoch": 0.049682303636609436, + "grad_norm": 1.9051284790039062, + "learning_rate": 2.9982118555687296e-05, + "loss": 0.2296142578125, + "step": 735 + }, + { + "epoch": 0.0497498986075436, + "grad_norm": 1.6175179481506348, + "learning_rate": 2.9981957131145895e-05, + "loss": 0.227569580078125, + "step": 736 + }, + { + "epoch": 0.04981749357847776, + "grad_norm": 1.4420385360717773, + "learning_rate": 2.9981794981684245e-05, + "loss": 0.16048431396484375, + "step": 737 + }, + { + "epoch": 0.04988508854941193, + "grad_norm": 2.3724558353424072, + "learning_rate": 2.9981632107310176e-05, + "loss": 0.2991943359375, + "step": 738 + }, + { + "epoch": 0.04995268352034608, + "grad_norm": 0.4480683207511902, + "learning_rate": 2.998146850803158e-05, + "loss": 0.098114013671875, + "step": 739 + }, + { + "epoch": 0.05002027849128025, + "grad_norm": 0.6670964956283569, + "learning_rate": 2.9981304183856366e-05, + "loss": 0.127899169921875, + "step": 740 + }, + { + "epoch": 0.05008787346221441, + "grad_norm": 3.2053420543670654, + "learning_rate": 2.9981139134792486e-05, + "loss": 0.2078094482421875, + "step": 741 + }, + { + "epoch": 0.050155468433148574, + "grad_norm": 1.4474420547485352, + "learning_rate": 2.998097336084793e-05, + "loss": 0.1575927734375, + "step": 742 + }, + { + "epoch": 0.05022306340408274, + "grad_norm": 1.4878344535827637, + "learning_rate": 2.998080686203072e-05, + "loss": 0.14337158203125, + "step": 743 + }, + { + "epoch": 0.0502906583750169, + "grad_norm": 0.7335812449455261, + "learning_rate": 2.9980639638348904e-05, + "loss": 0.08219146728515625, + "step": 744 + }, + { + "epoch": 0.05035825334595106, + "grad_norm": 1.8993242979049683, + "learning_rate": 2.9980471689810584e-05, + "loss": 0.13887786865234375, + "step": 745 + }, + { + "epoch": 0.05042584831688522, + "grad_norm": 5.487666130065918, + "learning_rate": 2.9980303016423877e-05, + "loss": 0.21343994140625, + "step": 746 + }, + { + "epoch": 0.050493443287819385, + "grad_norm": 0.8579437732696533, + "learning_rate": 2.998013361819695e-05, + "loss": 0.0528411865234375, + "step": 747 + }, + { + "epoch": 0.05056103825875355, + "grad_norm": 0.522335946559906, + "learning_rate": 2.9979963495137998e-05, + "loss": 0.04938507080078125, + "step": 748 + }, + { + "epoch": 0.05062863322968771, + "grad_norm": 1.4038749933242798, + "learning_rate": 2.9979792647255255e-05, + "loss": 0.173309326171875, + "step": 749 + }, + { + "epoch": 0.050696228200621876, + "grad_norm": 1.9262702465057373, + "learning_rate": 2.9979621074556983e-05, + "loss": 0.2718505859375, + "step": 750 + }, + { + "epoch": 0.05076382317155603, + "grad_norm": 0.7928689122200012, + "learning_rate": 2.9979448777051483e-05, + "loss": 0.1022491455078125, + "step": 751 + }, + { + "epoch": 0.050831418142490196, + "grad_norm": 1.661816954612732, + "learning_rate": 2.99792757547471e-05, + "loss": 0.205780029296875, + "step": 752 + }, + { + "epoch": 0.05089901311342436, + "grad_norm": 1.3144981861114502, + "learning_rate": 2.9979102007652202e-05, + "loss": 0.1789703369140625, + "step": 753 + }, + { + "epoch": 0.050966608084358524, + "grad_norm": 1.7416739463806152, + "learning_rate": 2.997892753577519e-05, + "loss": 0.0899505615234375, + "step": 754 + }, + { + "epoch": 0.05103420305529269, + "grad_norm": 2.2958550453186035, + "learning_rate": 2.9978752339124515e-05, + "loss": 0.210723876953125, + "step": 755 + }, + { + "epoch": 0.05110179802622685, + "grad_norm": 1.0415318012237549, + "learning_rate": 2.997857641770865e-05, + "loss": 0.08568572998046875, + "step": 756 + }, + { + "epoch": 0.051169392997161015, + "grad_norm": 1.6721125841140747, + "learning_rate": 2.9978399771536106e-05, + "loss": 0.0626220703125, + "step": 757 + }, + { + "epoch": 0.05123698796809517, + "grad_norm": 2.133713960647583, + "learning_rate": 2.9978222400615427e-05, + "loss": 0.19293212890625, + "step": 758 + }, + { + "epoch": 0.051304582939029335, + "grad_norm": 0.8121666312217712, + "learning_rate": 2.9978044304955202e-05, + "loss": 0.080230712890625, + "step": 759 + }, + { + "epoch": 0.0513721779099635, + "grad_norm": 1.9357140064239502, + "learning_rate": 2.9977865484564046e-05, + "loss": 0.1751251220703125, + "step": 760 + }, + { + "epoch": 0.05143977288089766, + "grad_norm": 2.2059619426727295, + "learning_rate": 2.9977685939450617e-05, + "loss": 0.1835174560546875, + "step": 761 + }, + { + "epoch": 0.051507367851831826, + "grad_norm": 0.7399638295173645, + "learning_rate": 2.997750566962359e-05, + "loss": 0.06817626953125, + "step": 762 + }, + { + "epoch": 0.05157496282276599, + "grad_norm": 0.9946776628494263, + "learning_rate": 2.9977324675091697e-05, + "loss": 0.0719757080078125, + "step": 763 + }, + { + "epoch": 0.051642557793700146, + "grad_norm": 0.7918753027915955, + "learning_rate": 2.9977142955863697e-05, + "loss": 0.135528564453125, + "step": 764 + }, + { + "epoch": 0.05171015276463431, + "grad_norm": 0.4469936788082123, + "learning_rate": 2.9976960511948375e-05, + "loss": 0.04674530029296875, + "step": 765 + }, + { + "epoch": 0.05177774773556847, + "grad_norm": 1.7443811893463135, + "learning_rate": 2.997677734335456e-05, + "loss": 0.1540985107421875, + "step": 766 + }, + { + "epoch": 0.05184534270650264, + "grad_norm": 2.6063644886016846, + "learning_rate": 2.9976593450091124e-05, + "loss": 0.24951171875, + "step": 767 + }, + { + "epoch": 0.0519129376774368, + "grad_norm": 2.1514410972595215, + "learning_rate": 2.997640883216696e-05, + "loss": 0.177093505859375, + "step": 768 + }, + { + "epoch": 0.051980532648370964, + "grad_norm": 1.2689915895462036, + "learning_rate": 2.997622348959099e-05, + "loss": 0.1782379150390625, + "step": 769 + }, + { + "epoch": 0.05204812761930512, + "grad_norm": 1.1663973331451416, + "learning_rate": 2.9976037422372197e-05, + "loss": 0.083831787109375, + "step": 770 + }, + { + "epoch": 0.052115722590239284, + "grad_norm": 2.425929546356201, + "learning_rate": 2.9975850630519578e-05, + "loss": 0.188201904296875, + "step": 771 + }, + { + "epoch": 0.05218331756117345, + "grad_norm": 1.9220999479293823, + "learning_rate": 2.997566311404217e-05, + "loss": 0.22662353515625, + "step": 772 + }, + { + "epoch": 0.05225091253210761, + "grad_norm": 1.0423612594604492, + "learning_rate": 2.997547487294905e-05, + "loss": 0.161376953125, + "step": 773 + }, + { + "epoch": 0.052318507503041775, + "grad_norm": 0.9252386093139648, + "learning_rate": 2.9975285907249326e-05, + "loss": 0.114654541015625, + "step": 774 + }, + { + "epoch": 0.05238610247397594, + "grad_norm": 3.2628707885742188, + "learning_rate": 2.9975096216952137e-05, + "loss": 0.212677001953125, + "step": 775 + }, + { + "epoch": 0.052453697444910095, + "grad_norm": 0.3435361087322235, + "learning_rate": 2.9974905802066662e-05, + "loss": 0.038402557373046875, + "step": 776 + }, + { + "epoch": 0.05252129241584426, + "grad_norm": 1.3578909635543823, + "learning_rate": 2.997471466260212e-05, + "loss": 0.20233154296875, + "step": 777 + }, + { + "epoch": 0.05258888738677842, + "grad_norm": 2.561923027038574, + "learning_rate": 2.9974522798567754e-05, + "loss": 0.1604461669921875, + "step": 778 + }, + { + "epoch": 0.052656482357712586, + "grad_norm": 3.206928014755249, + "learning_rate": 2.9974330209972848e-05, + "loss": 0.227447509765625, + "step": 779 + }, + { + "epoch": 0.05272407732864675, + "grad_norm": 1.0801254510879517, + "learning_rate": 2.9974136896826724e-05, + "loss": 0.069671630859375, + "step": 780 + }, + { + "epoch": 0.05279167229958091, + "grad_norm": 2.5935380458831787, + "learning_rate": 2.9973942859138735e-05, + "loss": 0.164520263671875, + "step": 781 + }, + { + "epoch": 0.05285926727051508, + "grad_norm": 1.7192434072494507, + "learning_rate": 2.9973748096918264e-05, + "loss": 0.2745361328125, + "step": 782 + }, + { + "epoch": 0.052926862241449234, + "grad_norm": 1.4753482341766357, + "learning_rate": 2.9973552610174745e-05, + "loss": 0.245269775390625, + "step": 783 + }, + { + "epoch": 0.0529944572123834, + "grad_norm": 2.1624927520751953, + "learning_rate": 2.9973356398917627e-05, + "loss": 0.2497100830078125, + "step": 784 + }, + { + "epoch": 0.05306205218331756, + "grad_norm": 1.3040817975997925, + "learning_rate": 2.997315946315641e-05, + "loss": 0.10846710205078125, + "step": 785 + }, + { + "epoch": 0.053129647154251725, + "grad_norm": 1.0497519969940186, + "learning_rate": 2.997296180290062e-05, + "loss": 0.09075927734375, + "step": 786 + }, + { + "epoch": 0.05319724212518589, + "grad_norm": 1.3192615509033203, + "learning_rate": 2.997276341815982e-05, + "loss": 0.1404266357421875, + "step": 787 + }, + { + "epoch": 0.05326483709612005, + "grad_norm": 2.337124824523926, + "learning_rate": 2.997256430894362e-05, + "loss": 0.213104248046875, + "step": 788 + }, + { + "epoch": 0.05333243206705421, + "grad_norm": 0.7429531216621399, + "learning_rate": 2.9972364475261634e-05, + "loss": 0.097991943359375, + "step": 789 + }, + { + "epoch": 0.05340002703798837, + "grad_norm": 1.060349702835083, + "learning_rate": 2.997216391712355e-05, + "loss": 0.1021270751953125, + "step": 790 + }, + { + "epoch": 0.053467622008922536, + "grad_norm": 0.5234419107437134, + "learning_rate": 2.9971962634539062e-05, + "loss": 0.09649658203125, + "step": 791 + }, + { + "epoch": 0.0535352169798567, + "grad_norm": 0.3470989763736725, + "learning_rate": 2.9971760627517915e-05, + "loss": 0.07071685791015625, + "step": 792 + }, + { + "epoch": 0.05360281195079086, + "grad_norm": 0.7459664940834045, + "learning_rate": 2.997155789606988e-05, + "loss": 0.04510498046875, + "step": 793 + }, + { + "epoch": 0.053670406921725027, + "grad_norm": 1.6446985006332397, + "learning_rate": 2.9971354440204764e-05, + "loss": 0.17181396484375, + "step": 794 + }, + { + "epoch": 0.05373800189265918, + "grad_norm": 2.4801266193389893, + "learning_rate": 2.9971150259932414e-05, + "loss": 0.23095703125, + "step": 795 + }, + { + "epoch": 0.05380559686359335, + "grad_norm": 0.6094555854797363, + "learning_rate": 2.997094535526271e-05, + "loss": 0.0838470458984375, + "step": 796 + }, + { + "epoch": 0.05387319183452751, + "grad_norm": 2.392026424407959, + "learning_rate": 2.997073972620557e-05, + "loss": 0.130279541015625, + "step": 797 + }, + { + "epoch": 0.053940786805461674, + "grad_norm": 0.6096291542053223, + "learning_rate": 2.9970533372770934e-05, + "loss": 0.069000244140625, + "step": 798 + }, + { + "epoch": 0.05400838177639584, + "grad_norm": 1.880600929260254, + "learning_rate": 2.99703262949688e-05, + "loss": 0.156707763671875, + "step": 799 + }, + { + "epoch": 0.05407597674733, + "grad_norm": 4.728000164031982, + "learning_rate": 2.9970118492809175e-05, + "loss": 0.18009567260742188, + "step": 800 + }, + { + "epoch": 0.05414357171826416, + "grad_norm": 2.9767110347747803, + "learning_rate": 2.996990996630212e-05, + "loss": 0.24395751953125, + "step": 801 + }, + { + "epoch": 0.05421116668919832, + "grad_norm": 2.442593812942505, + "learning_rate": 2.9969700715457728e-05, + "loss": 0.29327392578125, + "step": 802 + }, + { + "epoch": 0.054278761660132485, + "grad_norm": 1.5819250345230103, + "learning_rate": 2.9969490740286116e-05, + "loss": 0.1939544677734375, + "step": 803 + }, + { + "epoch": 0.05434635663106665, + "grad_norm": 2.858311891555786, + "learning_rate": 2.996928004079745e-05, + "loss": 0.100250244140625, + "step": 804 + }, + { + "epoch": 0.05441395160200081, + "grad_norm": 4.862689971923828, + "learning_rate": 2.996906861700192e-05, + "loss": 0.265960693359375, + "step": 805 + }, + { + "epoch": 0.054481546572934976, + "grad_norm": 1.9693169593811035, + "learning_rate": 2.996885646890976e-05, + "loss": 0.136260986328125, + "step": 806 + }, + { + "epoch": 0.05454914154386914, + "grad_norm": 0.6152768731117249, + "learning_rate": 2.9968643596531234e-05, + "loss": 0.12652587890625, + "step": 807 + }, + { + "epoch": 0.054616736514803296, + "grad_norm": 1.292212724685669, + "learning_rate": 2.996842999987664e-05, + "loss": 0.0840911865234375, + "step": 808 + }, + { + "epoch": 0.05468433148573746, + "grad_norm": 1.0400551557540894, + "learning_rate": 2.9968215678956315e-05, + "loss": 0.15380859375, + "step": 809 + }, + { + "epoch": 0.054751926456671623, + "grad_norm": 2.5345287322998047, + "learning_rate": 2.996800063378063e-05, + "loss": 0.1583404541015625, + "step": 810 + }, + { + "epoch": 0.05481952142760579, + "grad_norm": 1.0090272426605225, + "learning_rate": 2.996778486435999e-05, + "loss": 0.06238555908203125, + "step": 811 + }, + { + "epoch": 0.05488711639853995, + "grad_norm": 1.5094521045684814, + "learning_rate": 2.9967568370704837e-05, + "loss": 0.1192169189453125, + "step": 812 + }, + { + "epoch": 0.054954711369474114, + "grad_norm": 1.8224164247512817, + "learning_rate": 2.9967351152825638e-05, + "loss": 0.1237335205078125, + "step": 813 + }, + { + "epoch": 0.05502230634040827, + "grad_norm": 2.4984638690948486, + "learning_rate": 2.9967133210732917e-05, + "loss": 0.214019775390625, + "step": 814 + }, + { + "epoch": 0.055089901311342435, + "grad_norm": 2.7823970317840576, + "learning_rate": 2.9966914544437204e-05, + "loss": 0.141082763671875, + "step": 815 + }, + { + "epoch": 0.0551574962822766, + "grad_norm": 1.0938231945037842, + "learning_rate": 2.9966695153949087e-05, + "loss": 0.093505859375, + "step": 816 + }, + { + "epoch": 0.05522509125321076, + "grad_norm": 3.451517105102539, + "learning_rate": 2.9966475039279188e-05, + "loss": 0.1694488525390625, + "step": 817 + }, + { + "epoch": 0.055292686224144925, + "grad_norm": 0.383260577917099, + "learning_rate": 2.996625420043815e-05, + "loss": 0.0504302978515625, + "step": 818 + }, + { + "epoch": 0.05536028119507909, + "grad_norm": 1.290202260017395, + "learning_rate": 2.996603263743666e-05, + "loss": 0.09320068359375, + "step": 819 + }, + { + "epoch": 0.055427876166013246, + "grad_norm": 3.2454540729522705, + "learning_rate": 2.996581035028543e-05, + "loss": 0.1424102783203125, + "step": 820 + }, + { + "epoch": 0.05549547113694741, + "grad_norm": 2.513552665710449, + "learning_rate": 2.996558733899523e-05, + "loss": 0.0787811279296875, + "step": 821 + }, + { + "epoch": 0.05556306610788157, + "grad_norm": 2.425506114959717, + "learning_rate": 2.9965363603576845e-05, + "loss": 0.13590240478515625, + "step": 822 + }, + { + "epoch": 0.05563066107881574, + "grad_norm": 1.1194407939910889, + "learning_rate": 2.99651391440411e-05, + "loss": 0.172393798828125, + "step": 823 + }, + { + "epoch": 0.0556982560497499, + "grad_norm": 0.9336449503898621, + "learning_rate": 2.9964913960398855e-05, + "loss": 0.172393798828125, + "step": 824 + }, + { + "epoch": 0.055765851020684064, + "grad_norm": 0.8601374626159668, + "learning_rate": 2.996468805266101e-05, + "loss": 0.13332366943359375, + "step": 825 + }, + { + "epoch": 0.05583344599161822, + "grad_norm": 3.479795217514038, + "learning_rate": 2.996446142083849e-05, + "loss": 0.1258392333984375, + "step": 826 + }, + { + "epoch": 0.055901040962552384, + "grad_norm": 1.078012466430664, + "learning_rate": 2.996423406494226e-05, + "loss": 0.04630279541015625, + "step": 827 + }, + { + "epoch": 0.05596863593348655, + "grad_norm": 5.331472873687744, + "learning_rate": 2.996400598498333e-05, + "loss": 0.1746063232421875, + "step": 828 + }, + { + "epoch": 0.05603623090442071, + "grad_norm": 2.7642226219177246, + "learning_rate": 2.996377718097273e-05, + "loss": 0.18731689453125, + "step": 829 + }, + { + "epoch": 0.056103825875354875, + "grad_norm": 1.432850956916809, + "learning_rate": 2.9963547652921528e-05, + "loss": 0.1236572265625, + "step": 830 + }, + { + "epoch": 0.05617142084628904, + "grad_norm": 1.457736611366272, + "learning_rate": 2.9963317400840834e-05, + "loss": 0.13812255859375, + "step": 831 + }, + { + "epoch": 0.056239015817223195, + "grad_norm": 1.731471061706543, + "learning_rate": 2.9963086424741788e-05, + "loss": 0.167205810546875, + "step": 832 + }, + { + "epoch": 0.05630661078815736, + "grad_norm": 1.710494041442871, + "learning_rate": 2.9962854724635566e-05, + "loss": 0.1357879638671875, + "step": 833 + }, + { + "epoch": 0.05637420575909152, + "grad_norm": 0.42386457324028015, + "learning_rate": 2.9962622300533382e-05, + "loss": 0.06775665283203125, + "step": 834 + }, + { + "epoch": 0.056441800730025686, + "grad_norm": 0.3498987555503845, + "learning_rate": 2.996238915244648e-05, + "loss": 0.0518798828125, + "step": 835 + }, + { + "epoch": 0.05650939570095985, + "grad_norm": 0.3975345194339752, + "learning_rate": 2.9962155280386133e-05, + "loss": 0.076385498046875, + "step": 836 + }, + { + "epoch": 0.05657699067189401, + "grad_norm": 0.3868916928768158, + "learning_rate": 2.996192068436367e-05, + "loss": 0.0788726806640625, + "step": 837 + }, + { + "epoch": 0.05664458564282818, + "grad_norm": 1.539169192314148, + "learning_rate": 2.9961685364390444e-05, + "loss": 0.1669921875, + "step": 838 + }, + { + "epoch": 0.056712180613762334, + "grad_norm": 0.39753198623657227, + "learning_rate": 2.9961449320477828e-05, + "loss": 0.06993865966796875, + "step": 839 + }, + { + "epoch": 0.0567797755846965, + "grad_norm": 0.7295112609863281, + "learning_rate": 2.996121255263725e-05, + "loss": 0.07353973388671875, + "step": 840 + }, + { + "epoch": 0.05684737055563066, + "grad_norm": 2.83770489692688, + "learning_rate": 2.9960975060880163e-05, + "loss": 0.1655731201171875, + "step": 841 + }, + { + "epoch": 0.056914965526564824, + "grad_norm": 1.6561765670776367, + "learning_rate": 2.9960736845218065e-05, + "loss": 0.1276092529296875, + "step": 842 + }, + { + "epoch": 0.05698256049749899, + "grad_norm": 1.4960308074951172, + "learning_rate": 2.9960497905662478e-05, + "loss": 0.1047515869140625, + "step": 843 + }, + { + "epoch": 0.05705015546843315, + "grad_norm": 0.7189322710037231, + "learning_rate": 2.9960258242224962e-05, + "loss": 0.072235107421875, + "step": 844 + }, + { + "epoch": 0.05711775043936731, + "grad_norm": 0.5290546417236328, + "learning_rate": 2.9960017854917117e-05, + "loss": 0.0452423095703125, + "step": 845 + }, + { + "epoch": 0.05718534541030147, + "grad_norm": 3.078366279602051, + "learning_rate": 2.9959776743750573e-05, + "loss": 0.1862030029296875, + "step": 846 + }, + { + "epoch": 0.057252940381235636, + "grad_norm": 4.562908172607422, + "learning_rate": 2.9959534908736997e-05, + "loss": 0.168365478515625, + "step": 847 + }, + { + "epoch": 0.0573205353521698, + "grad_norm": 2.5665509700775146, + "learning_rate": 2.995929234988809e-05, + "loss": 0.1492919921875, + "step": 848 + }, + { + "epoch": 0.05738813032310396, + "grad_norm": 7.058149337768555, + "learning_rate": 2.9959049067215584e-05, + "loss": 0.31707763671875, + "step": 849 + }, + { + "epoch": 0.057455725294038126, + "grad_norm": 3.4122705459594727, + "learning_rate": 2.9958805060731258e-05, + "loss": 0.30743408203125, + "step": 850 + }, + { + "epoch": 0.05752332026497228, + "grad_norm": 1.0625618696212769, + "learning_rate": 2.9958560330446918e-05, + "loss": 0.18010711669921875, + "step": 851 + }, + { + "epoch": 0.05759091523590645, + "grad_norm": 0.9888483881950378, + "learning_rate": 2.9958314876374398e-05, + "loss": 0.0882568359375, + "step": 852 + }, + { + "epoch": 0.05765851020684061, + "grad_norm": 1.577160358428955, + "learning_rate": 2.9958068698525584e-05, + "loss": 0.0811614990234375, + "step": 853 + }, + { + "epoch": 0.057726105177774774, + "grad_norm": 9.544565200805664, + "learning_rate": 2.995782179691238e-05, + "loss": 0.393463134765625, + "step": 854 + }, + { + "epoch": 0.05779370014870894, + "grad_norm": 4.729395389556885, + "learning_rate": 2.9957574171546738e-05, + "loss": 0.21783447265625, + "step": 855 + }, + { + "epoch": 0.0578612951196431, + "grad_norm": 0.5957499742507935, + "learning_rate": 2.9957325822440633e-05, + "loss": 0.06842041015625, + "step": 856 + }, + { + "epoch": 0.05792889009057726, + "grad_norm": 0.3147412836551666, + "learning_rate": 2.995707674960609e-05, + "loss": 0.0429534912109375, + "step": 857 + }, + { + "epoch": 0.05799648506151142, + "grad_norm": 0.9388167858123779, + "learning_rate": 2.995682695305516e-05, + "loss": 0.096588134765625, + "step": 858 + }, + { + "epoch": 0.058064080032445585, + "grad_norm": 1.4326372146606445, + "learning_rate": 2.995657643279992e-05, + "loss": 0.05584716796875, + "step": 859 + }, + { + "epoch": 0.05813167500337975, + "grad_norm": 0.6919702291488647, + "learning_rate": 2.99563251888525e-05, + "loss": 0.101470947265625, + "step": 860 + }, + { + "epoch": 0.05819926997431391, + "grad_norm": 1.0722358226776123, + "learning_rate": 2.9956073221225055e-05, + "loss": 0.053386688232421875, + "step": 861 + }, + { + "epoch": 0.058266864945248076, + "grad_norm": 2.926563024520874, + "learning_rate": 2.995582052992978e-05, + "loss": 0.229827880859375, + "step": 862 + }, + { + "epoch": 0.05833445991618224, + "grad_norm": 6.733220100402832, + "learning_rate": 2.9955567114978893e-05, + "loss": 0.26611328125, + "step": 863 + }, + { + "epoch": 0.058402054887116396, + "grad_norm": 1.5382096767425537, + "learning_rate": 2.9955312976384664e-05, + "loss": 0.215850830078125, + "step": 864 + }, + { + "epoch": 0.05846964985805056, + "grad_norm": 0.7605106234550476, + "learning_rate": 2.995505811415939e-05, + "loss": 0.1740875244140625, + "step": 865 + }, + { + "epoch": 0.05853724482898472, + "grad_norm": 0.6929376125335693, + "learning_rate": 2.9954802528315393e-05, + "loss": 0.11006546020507812, + "step": 866 + }, + { + "epoch": 0.05860483979991889, + "grad_norm": 1.3323006629943848, + "learning_rate": 2.9954546218865052e-05, + "loss": 0.23101806640625, + "step": 867 + }, + { + "epoch": 0.05867243477085305, + "grad_norm": 3.1335277557373047, + "learning_rate": 2.9954289185820762e-05, + "loss": 0.26544189453125, + "step": 868 + }, + { + "epoch": 0.058740029741787214, + "grad_norm": 1.484539270401001, + "learning_rate": 2.9954031429194965e-05, + "loss": 0.2216339111328125, + "step": 869 + }, + { + "epoch": 0.05880762471272137, + "grad_norm": 1.9327133893966675, + "learning_rate": 2.9953772949000123e-05, + "loss": 0.2845458984375, + "step": 870 + }, + { + "epoch": 0.058875219683655534, + "grad_norm": 1.3728270530700684, + "learning_rate": 2.9953513745248755e-05, + "loss": 0.212890625, + "step": 871 + }, + { + "epoch": 0.0589428146545897, + "grad_norm": 2.0589420795440674, + "learning_rate": 2.9953253817953393e-05, + "loss": 0.16388702392578125, + "step": 872 + }, + { + "epoch": 0.05901040962552386, + "grad_norm": 1.0501657724380493, + "learning_rate": 2.995299316712662e-05, + "loss": 0.216522216796875, + "step": 873 + }, + { + "epoch": 0.059078004596458025, + "grad_norm": 0.1925058662891388, + "learning_rate": 2.9952731792781046e-05, + "loss": 0.030162811279296875, + "step": 874 + }, + { + "epoch": 0.05914559956739219, + "grad_norm": 2.209242582321167, + "learning_rate": 2.9952469694929317e-05, + "loss": 0.13317108154296875, + "step": 875 + }, + { + "epoch": 0.059213194538326346, + "grad_norm": 0.5595496296882629, + "learning_rate": 2.9952206873584117e-05, + "loss": 0.096771240234375, + "step": 876 + }, + { + "epoch": 0.05928078950926051, + "grad_norm": 1.8298194408416748, + "learning_rate": 2.995194332875816e-05, + "loss": 0.191619873046875, + "step": 877 + }, + { + "epoch": 0.05934838448019467, + "grad_norm": 1.379042625427246, + "learning_rate": 2.9951679060464203e-05, + "loss": 0.160980224609375, + "step": 878 + }, + { + "epoch": 0.059415979451128836, + "grad_norm": 1.3410826921463013, + "learning_rate": 2.9951414068715027e-05, + "loss": 0.2058868408203125, + "step": 879 + }, + { + "epoch": 0.059483574422063, + "grad_norm": 2.6695737838745117, + "learning_rate": 2.9951148353523454e-05, + "loss": 0.09976959228515625, + "step": 880 + }, + { + "epoch": 0.059551169392997164, + "grad_norm": 0.6443442702293396, + "learning_rate": 2.9950881914902346e-05, + "loss": 0.0819091796875, + "step": 881 + }, + { + "epoch": 0.05961876436393132, + "grad_norm": 0.7715091109275818, + "learning_rate": 2.9950614752864594e-05, + "loss": 0.183868408203125, + "step": 882 + }, + { + "epoch": 0.059686359334865484, + "grad_norm": 0.5205076932907104, + "learning_rate": 2.9950346867423124e-05, + "loss": 0.04035186767578125, + "step": 883 + }, + { + "epoch": 0.05975395430579965, + "grad_norm": 2.6942296028137207, + "learning_rate": 2.9950078258590895e-05, + "loss": 0.13458251953125, + "step": 884 + }, + { + "epoch": 0.05982154927673381, + "grad_norm": 3.1313858032226562, + "learning_rate": 2.9949808926380908e-05, + "loss": 0.24163818359375, + "step": 885 + }, + { + "epoch": 0.059889144247667975, + "grad_norm": 0.6924409866333008, + "learning_rate": 2.994953887080619e-05, + "loss": 0.0685882568359375, + "step": 886 + }, + { + "epoch": 0.05995673921860214, + "grad_norm": 2.5856430530548096, + "learning_rate": 2.994926809187981e-05, + "loss": 0.268463134765625, + "step": 887 + }, + { + "epoch": 0.0600243341895363, + "grad_norm": 3.2327497005462646, + "learning_rate": 2.9948996589614874e-05, + "loss": 0.2279815673828125, + "step": 888 + }, + { + "epoch": 0.06009192916047046, + "grad_norm": 1.5033053159713745, + "learning_rate": 2.9948724364024517e-05, + "loss": 0.28533935546875, + "step": 889 + }, + { + "epoch": 0.06015952413140462, + "grad_norm": 1.2745696306228638, + "learning_rate": 2.994845141512191e-05, + "loss": 0.126434326171875, + "step": 890 + }, + { + "epoch": 0.060227119102338786, + "grad_norm": 0.6408318877220154, + "learning_rate": 2.994817774292026e-05, + "loss": 0.0765228271484375, + "step": 891 + }, + { + "epoch": 0.06029471407327295, + "grad_norm": 0.7295949459075928, + "learning_rate": 2.994790334743281e-05, + "loss": 0.09851837158203125, + "step": 892 + }, + { + "epoch": 0.06036230904420711, + "grad_norm": 2.16662335395813, + "learning_rate": 2.9947628228672832e-05, + "loss": 0.18022918701171875, + "step": 893 + }, + { + "epoch": 0.06042990401514128, + "grad_norm": 2.1763756275177, + "learning_rate": 2.9947352386653646e-05, + "loss": 0.26983642578125, + "step": 894 + }, + { + "epoch": 0.06049749898607543, + "grad_norm": 1.033920168876648, + "learning_rate": 2.9947075821388593e-05, + "loss": 0.183349609375, + "step": 895 + }, + { + "epoch": 0.0605650939570096, + "grad_norm": 0.6783666610717773, + "learning_rate": 2.9946798532891057e-05, + "loss": 0.12408447265625, + "step": 896 + }, + { + "epoch": 0.06063268892794376, + "grad_norm": 0.5905393362045288, + "learning_rate": 2.9946520521174456e-05, + "loss": 0.09354400634765625, + "step": 897 + }, + { + "epoch": 0.060700283898877924, + "grad_norm": 3.2582030296325684, + "learning_rate": 2.9946241786252238e-05, + "loss": 0.2055816650390625, + "step": 898 + }, + { + "epoch": 0.06076787886981209, + "grad_norm": 0.6703091859817505, + "learning_rate": 2.9945962328137898e-05, + "loss": 0.20318603515625, + "step": 899 + }, + { + "epoch": 0.06083547384074625, + "grad_norm": 1.229465365409851, + "learning_rate": 2.994568214684495e-05, + "loss": 0.12152862548828125, + "step": 900 + }, + { + "epoch": 0.06090306881168041, + "grad_norm": 1.2551441192626953, + "learning_rate": 2.9945401242386954e-05, + "loss": 0.06800079345703125, + "step": 901 + }, + { + "epoch": 0.06097066378261457, + "grad_norm": 4.011671543121338, + "learning_rate": 2.99451196147775e-05, + "loss": 0.28704833984375, + "step": 902 + }, + { + "epoch": 0.061038258753548735, + "grad_norm": 0.4648917019367218, + "learning_rate": 2.994483726403022e-05, + "loss": 0.06903076171875, + "step": 903 + }, + { + "epoch": 0.0611058537244829, + "grad_norm": 1.7956534624099731, + "learning_rate": 2.9944554190158763e-05, + "loss": 0.182708740234375, + "step": 904 + }, + { + "epoch": 0.06117344869541706, + "grad_norm": 4.59787130355835, + "learning_rate": 2.9944270393176843e-05, + "loss": 0.234893798828125, + "step": 905 + }, + { + "epoch": 0.061241043666351226, + "grad_norm": 0.9047979116439819, + "learning_rate": 2.9943985873098183e-05, + "loss": 0.08118438720703125, + "step": 906 + }, + { + "epoch": 0.06130863863728538, + "grad_norm": 0.4286149740219116, + "learning_rate": 2.994370062993655e-05, + "loss": 0.0568084716796875, + "step": 907 + }, + { + "epoch": 0.061376233608219546, + "grad_norm": 0.7139745354652405, + "learning_rate": 2.9943414663705744e-05, + "loss": 0.09337234497070312, + "step": 908 + }, + { + "epoch": 0.06144382857915371, + "grad_norm": 1.1404082775115967, + "learning_rate": 2.994312797441961e-05, + "loss": 0.111419677734375, + "step": 909 + }, + { + "epoch": 0.061511423550087874, + "grad_norm": 0.519210696220398, + "learning_rate": 2.9942840562092013e-05, + "loss": 0.052570343017578125, + "step": 910 + }, + { + "epoch": 0.06157901852102204, + "grad_norm": 1.011319875717163, + "learning_rate": 2.9942552426736855e-05, + "loss": 0.14933013916015625, + "step": 911 + }, + { + "epoch": 0.0616466134919562, + "grad_norm": 0.954541802406311, + "learning_rate": 2.994226356836809e-05, + "loss": 0.1376800537109375, + "step": 912 + }, + { + "epoch": 0.06171420846289036, + "grad_norm": 2.0333518981933594, + "learning_rate": 2.9941973986999685e-05, + "loss": 0.119476318359375, + "step": 913 + }, + { + "epoch": 0.06178180343382452, + "grad_norm": 1.629976749420166, + "learning_rate": 2.9941683682645657e-05, + "loss": 0.20188140869140625, + "step": 914 + }, + { + "epoch": 0.061849398404758685, + "grad_norm": 0.820031464099884, + "learning_rate": 2.9941392655320053e-05, + "loss": 0.173797607421875, + "step": 915 + }, + { + "epoch": 0.06191699337569285, + "grad_norm": 1.9376895427703857, + "learning_rate": 2.9941100905036954e-05, + "loss": 0.2962646484375, + "step": 916 + }, + { + "epoch": 0.06198458834662701, + "grad_norm": 0.7108121514320374, + "learning_rate": 2.994080843181047e-05, + "loss": 0.0908050537109375, + "step": 917 + }, + { + "epoch": 0.062052183317561176, + "grad_norm": 2.0119616985321045, + "learning_rate": 2.994051523565476e-05, + "loss": 0.1720123291015625, + "step": 918 + }, + { + "epoch": 0.06211977828849534, + "grad_norm": 0.8919369578361511, + "learning_rate": 2.9940221316584015e-05, + "loss": 0.161651611328125, + "step": 919 + }, + { + "epoch": 0.062187373259429496, + "grad_norm": 2.271965265274048, + "learning_rate": 2.9939926674612437e-05, + "loss": 0.26678466796875, + "step": 920 + }, + { + "epoch": 0.06225496823036366, + "grad_norm": 1.2467728853225708, + "learning_rate": 2.9939631309754306e-05, + "loss": 0.10289764404296875, + "step": 921 + }, + { + "epoch": 0.06232256320129782, + "grad_norm": 3.075059413909912, + "learning_rate": 2.9939335222023902e-05, + "loss": 0.26123046875, + "step": 922 + }, + { + "epoch": 0.06239015817223199, + "grad_norm": 0.7037169337272644, + "learning_rate": 2.9939038411435554e-05, + "loss": 0.0881500244140625, + "step": 923 + }, + { + "epoch": 0.06245775314316615, + "grad_norm": 0.3032863736152649, + "learning_rate": 2.993874087800362e-05, + "loss": 0.067626953125, + "step": 924 + }, + { + "epoch": 0.06252534811410031, + "grad_norm": 0.9380422830581665, + "learning_rate": 2.9938442621742505e-05, + "loss": 0.201629638671875, + "step": 925 + }, + { + "epoch": 0.06259294308503448, + "grad_norm": 0.5217441916465759, + "learning_rate": 2.993814364266663e-05, + "loss": 0.0508270263671875, + "step": 926 + }, + { + "epoch": 0.06266053805596863, + "grad_norm": 3.9842731952667236, + "learning_rate": 2.9937843940790466e-05, + "loss": 0.2939453125, + "step": 927 + }, + { + "epoch": 0.0627281330269028, + "grad_norm": 2.396908760070801, + "learning_rate": 2.993754351612852e-05, + "loss": 0.19091796875, + "step": 928 + }, + { + "epoch": 0.06279572799783696, + "grad_norm": 1.32743239402771, + "learning_rate": 2.9937242368695316e-05, + "loss": 0.1046142578125, + "step": 929 + }, + { + "epoch": 0.06286332296877112, + "grad_norm": 1.4905118942260742, + "learning_rate": 2.9936940498505436e-05, + "loss": 0.1589508056640625, + "step": 930 + }, + { + "epoch": 0.06293091793970529, + "grad_norm": 0.5056895017623901, + "learning_rate": 2.9936637905573483e-05, + "loss": 0.08492279052734375, + "step": 931 + }, + { + "epoch": 0.06299851291063945, + "grad_norm": 0.5784094929695129, + "learning_rate": 2.9936334589914097e-05, + "loss": 0.06884765625, + "step": 932 + }, + { + "epoch": 0.06306610788157362, + "grad_norm": 1.221081018447876, + "learning_rate": 2.9936030551541958e-05, + "loss": 0.1058807373046875, + "step": 933 + }, + { + "epoch": 0.06313370285250777, + "grad_norm": 2.9747819900512695, + "learning_rate": 2.993572579047177e-05, + "loss": 0.151580810546875, + "step": 934 + }, + { + "epoch": 0.06320129782344193, + "grad_norm": 1.7097184658050537, + "learning_rate": 2.9935420306718287e-05, + "loss": 0.1144256591796875, + "step": 935 + }, + { + "epoch": 0.0632688927943761, + "grad_norm": 1.9121861457824707, + "learning_rate": 2.9935114100296286e-05, + "loss": 0.1721649169921875, + "step": 936 + }, + { + "epoch": 0.06333648776531026, + "grad_norm": 1.5236896276474, + "learning_rate": 2.9934807171220584e-05, + "loss": 0.1590118408203125, + "step": 937 + }, + { + "epoch": 0.06340408273624443, + "grad_norm": 0.9815553426742554, + "learning_rate": 2.9934499519506035e-05, + "loss": 0.16290283203125, + "step": 938 + }, + { + "epoch": 0.06347167770717858, + "grad_norm": 0.4308045208454132, + "learning_rate": 2.9934191145167523e-05, + "loss": 0.036121368408203125, + "step": 939 + }, + { + "epoch": 0.06353927267811275, + "grad_norm": 2.487091302871704, + "learning_rate": 2.9933882048219965e-05, + "loss": 0.20343017578125, + "step": 940 + }, + { + "epoch": 0.06360686764904691, + "grad_norm": 0.4010871648788452, + "learning_rate": 2.9933572228678324e-05, + "loss": 0.06662750244140625, + "step": 941 + }, + { + "epoch": 0.06367446261998107, + "grad_norm": 2.488725185394287, + "learning_rate": 2.9933261686557585e-05, + "loss": 0.24774169921875, + "step": 942 + }, + { + "epoch": 0.06374205759091524, + "grad_norm": 0.6642182469367981, + "learning_rate": 2.993295042187278e-05, + "loss": 0.11006927490234375, + "step": 943 + }, + { + "epoch": 0.0638096525618494, + "grad_norm": 1.117730736732483, + "learning_rate": 2.9932638434638964e-05, + "loss": 0.1292266845703125, + "step": 944 + }, + { + "epoch": 0.06387724753278357, + "grad_norm": 4.4256696701049805, + "learning_rate": 2.9932325724871236e-05, + "loss": 0.317901611328125, + "step": 945 + }, + { + "epoch": 0.06394484250371772, + "grad_norm": 0.7872300148010254, + "learning_rate": 2.9932012292584726e-05, + "loss": 0.0988922119140625, + "step": 946 + }, + { + "epoch": 0.06401243747465189, + "grad_norm": 1.0284101963043213, + "learning_rate": 2.9931698137794603e-05, + "loss": 0.19512939453125, + "step": 947 + }, + { + "epoch": 0.06408003244558605, + "grad_norm": 1.0618596076965332, + "learning_rate": 2.993138326051606e-05, + "loss": 0.216094970703125, + "step": 948 + }, + { + "epoch": 0.0641476274165202, + "grad_norm": 0.9642961025238037, + "learning_rate": 2.993106766076434e-05, + "loss": 0.23114013671875, + "step": 949 + }, + { + "epoch": 0.06421522238745438, + "grad_norm": 0.5454681515693665, + "learning_rate": 2.993075133855471e-05, + "loss": 0.0394134521484375, + "step": 950 + }, + { + "epoch": 0.06428281735838853, + "grad_norm": 1.5127077102661133, + "learning_rate": 2.993043429390248e-05, + "loss": 0.238555908203125, + "step": 951 + }, + { + "epoch": 0.0643504123293227, + "grad_norm": 3.2921745777130127, + "learning_rate": 2.9930116526822987e-05, + "loss": 0.2462158203125, + "step": 952 + }, + { + "epoch": 0.06441800730025686, + "grad_norm": 1.9759520292282104, + "learning_rate": 2.9929798037331602e-05, + "loss": 0.23211669921875, + "step": 953 + }, + { + "epoch": 0.06448560227119102, + "grad_norm": 3.8454272747039795, + "learning_rate": 2.9929478825443743e-05, + "loss": 0.24908447265625, + "step": 954 + }, + { + "epoch": 0.06455319724212519, + "grad_norm": 1.6233363151550293, + "learning_rate": 2.9929158891174856e-05, + "loss": 0.2196044921875, + "step": 955 + }, + { + "epoch": 0.06462079221305934, + "grad_norm": 2.476318120956421, + "learning_rate": 2.9928838234540416e-05, + "loss": 0.22528076171875, + "step": 956 + }, + { + "epoch": 0.06468838718399351, + "grad_norm": 1.4296510219573975, + "learning_rate": 2.9928516855555942e-05, + "loss": 0.203460693359375, + "step": 957 + }, + { + "epoch": 0.06475598215492767, + "grad_norm": 0.6637486815452576, + "learning_rate": 2.992819475423698e-05, + "loss": 0.05867767333984375, + "step": 958 + }, + { + "epoch": 0.06482357712586184, + "grad_norm": 1.1480967998504639, + "learning_rate": 2.9927871930599123e-05, + "loss": 0.108642578125, + "step": 959 + }, + { + "epoch": 0.064891172096796, + "grad_norm": 0.5949143767356873, + "learning_rate": 2.9927548384657986e-05, + "loss": 0.07279205322265625, + "step": 960 + }, + { + "epoch": 0.06495876706773016, + "grad_norm": 2.5029382705688477, + "learning_rate": 2.992722411642922e-05, + "loss": 0.218414306640625, + "step": 961 + }, + { + "epoch": 0.06502636203866433, + "grad_norm": 0.41039636731147766, + "learning_rate": 2.9926899125928524e-05, + "loss": 0.10589599609375, + "step": 962 + }, + { + "epoch": 0.06509395700959848, + "grad_norm": 3.4498698711395264, + "learning_rate": 2.9926573413171618e-05, + "loss": 0.263702392578125, + "step": 963 + }, + { + "epoch": 0.06516155198053265, + "grad_norm": 3.5646474361419678, + "learning_rate": 2.992624697817426e-05, + "loss": 0.216827392578125, + "step": 964 + }, + { + "epoch": 0.06522914695146681, + "grad_norm": 0.6922155022621155, + "learning_rate": 2.992591982095225e-05, + "loss": 0.13646697998046875, + "step": 965 + }, + { + "epoch": 0.06529674192240097, + "grad_norm": 1.3411462306976318, + "learning_rate": 2.992559194152142e-05, + "loss": 0.20214080810546875, + "step": 966 + }, + { + "epoch": 0.06536433689333514, + "grad_norm": 0.29705294966697693, + "learning_rate": 2.9925263339897623e-05, + "loss": 0.04436492919921875, + "step": 967 + }, + { + "epoch": 0.0654319318642693, + "grad_norm": 0.34629586338996887, + "learning_rate": 2.9924934016096775e-05, + "loss": 0.0415802001953125, + "step": 968 + }, + { + "epoch": 0.06549952683520346, + "grad_norm": 0.9472656846046448, + "learning_rate": 2.9924603970134793e-05, + "loss": 0.08475494384765625, + "step": 969 + }, + { + "epoch": 0.06556712180613762, + "grad_norm": 1.3625463247299194, + "learning_rate": 2.9924273202027665e-05, + "loss": 0.14196014404296875, + "step": 970 + }, + { + "epoch": 0.06563471677707179, + "grad_norm": 0.8600717782974243, + "learning_rate": 2.9923941711791376e-05, + "loss": 0.067169189453125, + "step": 971 + }, + { + "epoch": 0.06570231174800595, + "grad_norm": 0.9851216673851013, + "learning_rate": 2.9923609499441983e-05, + "loss": 0.159942626953125, + "step": 972 + }, + { + "epoch": 0.0657699067189401, + "grad_norm": 3.9642441272735596, + "learning_rate": 2.9923276564995553e-05, + "loss": 0.323455810546875, + "step": 973 + }, + { + "epoch": 0.06583750168987428, + "grad_norm": 0.4195259213447571, + "learning_rate": 2.9922942908468195e-05, + "loss": 0.055171966552734375, + "step": 974 + }, + { + "epoch": 0.06590509666080843, + "grad_norm": 1.2925944328308105, + "learning_rate": 2.9922608529876052e-05, + "loss": 0.13216400146484375, + "step": 975 + }, + { + "epoch": 0.0659726916317426, + "grad_norm": 0.4830281436443329, + "learning_rate": 2.992227342923531e-05, + "loss": 0.06310653686523438, + "step": 976 + }, + { + "epoch": 0.06604028660267676, + "grad_norm": 1.2209314107894897, + "learning_rate": 2.9921937606562177e-05, + "loss": 0.13620758056640625, + "step": 977 + }, + { + "epoch": 0.06610788157361093, + "grad_norm": 1.265149712562561, + "learning_rate": 2.99216010618729e-05, + "loss": 0.194732666015625, + "step": 978 + }, + { + "epoch": 0.06617547654454509, + "grad_norm": 0.6316042542457581, + "learning_rate": 2.9921263795183773e-05, + "loss": 0.06876373291015625, + "step": 979 + }, + { + "epoch": 0.06624307151547924, + "grad_norm": 1.8511770963668823, + "learning_rate": 2.992092580651111e-05, + "loss": 0.24432373046875, + "step": 980 + }, + { + "epoch": 0.06631066648641341, + "grad_norm": 1.3869339227676392, + "learning_rate": 2.9920587095871262e-05, + "loss": 0.09899139404296875, + "step": 981 + }, + { + "epoch": 0.06637826145734757, + "grad_norm": 2.2255873680114746, + "learning_rate": 2.9920247663280615e-05, + "loss": 0.2391357421875, + "step": 982 + }, + { + "epoch": 0.06644585642828174, + "grad_norm": 1.0579551458358765, + "learning_rate": 2.9919907508755605e-05, + "loss": 0.13504791259765625, + "step": 983 + }, + { + "epoch": 0.0665134513992159, + "grad_norm": 1.8259472846984863, + "learning_rate": 2.991956663231268e-05, + "loss": 0.263885498046875, + "step": 984 + }, + { + "epoch": 0.06658104637015005, + "grad_norm": 2.5853912830352783, + "learning_rate": 2.9919225033968344e-05, + "loss": 0.217193603515625, + "step": 985 + }, + { + "epoch": 0.06664864134108422, + "grad_norm": 4.6347270011901855, + "learning_rate": 2.9918882713739113e-05, + "loss": 0.2469329833984375, + "step": 986 + }, + { + "epoch": 0.06671623631201838, + "grad_norm": 2.27030611038208, + "learning_rate": 2.9918539671641553e-05, + "loss": 0.14069366455078125, + "step": 987 + }, + { + "epoch": 0.06678383128295255, + "grad_norm": 4.7777628898620605, + "learning_rate": 2.991819590769227e-05, + "loss": 0.2301788330078125, + "step": 988 + }, + { + "epoch": 0.06685142625388671, + "grad_norm": 0.5740934014320374, + "learning_rate": 2.99178514219079e-05, + "loss": 0.09119033813476562, + "step": 989 + }, + { + "epoch": 0.06691902122482088, + "grad_norm": 1.1940112113952637, + "learning_rate": 2.9917506214305098e-05, + "loss": 0.1667022705078125, + "step": 990 + }, + { + "epoch": 0.06698661619575504, + "grad_norm": 1.064487338066101, + "learning_rate": 2.9917160284900575e-05, + "loss": 0.240875244140625, + "step": 991 + }, + { + "epoch": 0.06705421116668919, + "grad_norm": 1.808379888534546, + "learning_rate": 2.991681363371107e-05, + "loss": 0.047821044921875, + "step": 992 + }, + { + "epoch": 0.06712180613762336, + "grad_norm": 1.7435846328735352, + "learning_rate": 2.9916466260753358e-05, + "loss": 0.0933837890625, + "step": 993 + }, + { + "epoch": 0.06718940110855752, + "grad_norm": 0.4546066224575043, + "learning_rate": 2.991611816604424e-05, + "loss": 0.030384063720703125, + "step": 994 + }, + { + "epoch": 0.06725699607949169, + "grad_norm": 1.086093544960022, + "learning_rate": 2.9915769349600565e-05, + "loss": 0.08676910400390625, + "step": 995 + }, + { + "epoch": 0.06732459105042585, + "grad_norm": 1.7769551277160645, + "learning_rate": 2.9915419811439207e-05, + "loss": 0.1594085693359375, + "step": 996 + }, + { + "epoch": 0.06739218602136002, + "grad_norm": 1.591798186302185, + "learning_rate": 2.991506955157708e-05, + "loss": 0.1211090087890625, + "step": 997 + }, + { + "epoch": 0.06745978099229417, + "grad_norm": 0.3632519543170929, + "learning_rate": 2.991471857003113e-05, + "loss": 0.045665740966796875, + "step": 998 + }, + { + "epoch": 0.06752737596322833, + "grad_norm": 1.1140685081481934, + "learning_rate": 2.991436686681835e-05, + "loss": 0.14583587646484375, + "step": 999 + }, + { + "epoch": 0.0675949709341625, + "grad_norm": 1.5290324687957764, + "learning_rate": 2.991401444195574e-05, + "loss": 0.23516845703125, + "step": 1000 + }, + { + "epoch": 0.06766256590509666, + "grad_norm": 0.6099919676780701, + "learning_rate": 2.9913661295460367e-05, + "loss": 0.12030029296875, + "step": 1001 + }, + { + "epoch": 0.06773016087603083, + "grad_norm": 7.517384052276611, + "learning_rate": 2.9913307427349314e-05, + "loss": 0.3355712890625, + "step": 1002 + }, + { + "epoch": 0.06779775584696499, + "grad_norm": 1.513765573501587, + "learning_rate": 2.99129528376397e-05, + "loss": 0.203399658203125, + "step": 1003 + }, + { + "epoch": 0.06786535081789914, + "grad_norm": 0.9769092202186584, + "learning_rate": 2.9912597526348686e-05, + "loss": 0.09569168090820312, + "step": 1004 + }, + { + "epoch": 0.06793294578883331, + "grad_norm": 2.4454801082611084, + "learning_rate": 2.9912241493493467e-05, + "loss": 0.26531982421875, + "step": 1005 + }, + { + "epoch": 0.06800054075976747, + "grad_norm": 2.0424702167510986, + "learning_rate": 2.991188473909126e-05, + "loss": 0.1972198486328125, + "step": 1006 + }, + { + "epoch": 0.06806813573070164, + "grad_norm": 0.9653903245925903, + "learning_rate": 2.9911527263159337e-05, + "loss": 0.24395751953125, + "step": 1007 + }, + { + "epoch": 0.0681357307016358, + "grad_norm": 1.9239991903305054, + "learning_rate": 2.9911169065714992e-05, + "loss": 0.172271728515625, + "step": 1008 + }, + { + "epoch": 0.06820332567256997, + "grad_norm": 1.4577077627182007, + "learning_rate": 2.9910810146775555e-05, + "loss": 0.23358154296875, + "step": 1009 + }, + { + "epoch": 0.06827092064350412, + "grad_norm": 1.164260745048523, + "learning_rate": 2.991045050635839e-05, + "loss": 0.1011962890625, + "step": 1010 + }, + { + "epoch": 0.06833851561443828, + "grad_norm": 0.2223067283630371, + "learning_rate": 2.991009014448091e-05, + "loss": 0.04528045654296875, + "step": 1011 + }, + { + "epoch": 0.06840611058537245, + "grad_norm": 1.222305417060852, + "learning_rate": 2.990972906116054e-05, + "loss": 0.133270263671875, + "step": 1012 + }, + { + "epoch": 0.06847370555630661, + "grad_norm": 2.887559652328491, + "learning_rate": 2.9909367256414754e-05, + "loss": 0.233428955078125, + "step": 1013 + }, + { + "epoch": 0.06854130052724078, + "grad_norm": 2.6124536991119385, + "learning_rate": 2.9909004730261062e-05, + "loss": 0.14493179321289062, + "step": 1014 + }, + { + "epoch": 0.06860889549817493, + "grad_norm": 0.6863389611244202, + "learning_rate": 2.9908641482717e-05, + "loss": 0.049457550048828125, + "step": 1015 + }, + { + "epoch": 0.06867649046910909, + "grad_norm": 2.1674764156341553, + "learning_rate": 2.990827751380015e-05, + "loss": 0.18416595458984375, + "step": 1016 + }, + { + "epoch": 0.06874408544004326, + "grad_norm": 1.01522958278656, + "learning_rate": 2.990791282352812e-05, + "loss": 0.09918212890625, + "step": 1017 + }, + { + "epoch": 0.06881168041097742, + "grad_norm": 0.8445401787757874, + "learning_rate": 2.9907547411918556e-05, + "loss": 0.0784912109375, + "step": 1018 + }, + { + "epoch": 0.06887927538191159, + "grad_norm": 2.38413405418396, + "learning_rate": 2.9907181278989138e-05, + "loss": 0.182647705078125, + "step": 1019 + }, + { + "epoch": 0.06894687035284575, + "grad_norm": 0.5141571164131165, + "learning_rate": 2.9906814424757585e-05, + "loss": 0.109100341796875, + "step": 1020 + }, + { + "epoch": 0.06901446532377992, + "grad_norm": 0.9956170916557312, + "learning_rate": 2.9906446849241648e-05, + "loss": 0.21087646484375, + "step": 1021 + }, + { + "epoch": 0.06908206029471407, + "grad_norm": 0.7577962875366211, + "learning_rate": 2.99060785524591e-05, + "loss": 0.061855316162109375, + "step": 1022 + }, + { + "epoch": 0.06914965526564823, + "grad_norm": 1.460848093032837, + "learning_rate": 2.9905709534427783e-05, + "loss": 0.07253265380859375, + "step": 1023 + }, + { + "epoch": 0.0692172502365824, + "grad_norm": 0.989648699760437, + "learning_rate": 2.9905339795165535e-05, + "loss": 0.19488525390625, + "step": 1024 + }, + { + "epoch": 0.06928484520751656, + "grad_norm": 0.6458337903022766, + "learning_rate": 2.9904969334690254e-05, + "loss": 0.04381561279296875, + "step": 1025 + }, + { + "epoch": 0.06935244017845073, + "grad_norm": 2.398688793182373, + "learning_rate": 2.9904598153019866e-05, + "loss": 0.2125244140625, + "step": 1026 + }, + { + "epoch": 0.06942003514938488, + "grad_norm": 4.483644962310791, + "learning_rate": 2.9904226250172325e-05, + "loss": 0.3067626953125, + "step": 1027 + }, + { + "epoch": 0.06948763012031905, + "grad_norm": 0.43189990520477295, + "learning_rate": 2.990385362616563e-05, + "loss": 0.04752349853515625, + "step": 1028 + }, + { + "epoch": 0.06955522509125321, + "grad_norm": 0.6409412026405334, + "learning_rate": 2.9903480281017815e-05, + "loss": 0.136138916015625, + "step": 1029 + }, + { + "epoch": 0.06962282006218737, + "grad_norm": 0.7920196652412415, + "learning_rate": 2.9903106214746936e-05, + "loss": 0.1298370361328125, + "step": 1030 + }, + { + "epoch": 0.06969041503312154, + "grad_norm": 1.2256666421890259, + "learning_rate": 2.9902731427371096e-05, + "loss": 0.17897796630859375, + "step": 1031 + }, + { + "epoch": 0.0697580100040557, + "grad_norm": 1.385477900505066, + "learning_rate": 2.990235591890843e-05, + "loss": 0.141754150390625, + "step": 1032 + }, + { + "epoch": 0.06982560497498987, + "grad_norm": 0.5798199772834778, + "learning_rate": 2.9901979689377112e-05, + "loss": 0.1222076416015625, + "step": 1033 + }, + { + "epoch": 0.06989319994592402, + "grad_norm": 1.0417646169662476, + "learning_rate": 2.990160273879534e-05, + "loss": 0.156280517578125, + "step": 1034 + }, + { + "epoch": 0.06996079491685818, + "grad_norm": 0.4576219618320465, + "learning_rate": 2.9901225067181357e-05, + "loss": 0.112335205078125, + "step": 1035 + }, + { + "epoch": 0.07002838988779235, + "grad_norm": 1.8268146514892578, + "learning_rate": 2.9900846674553433e-05, + "loss": 0.0878753662109375, + "step": 1036 + }, + { + "epoch": 0.0700959848587265, + "grad_norm": 0.6079979538917542, + "learning_rate": 2.990046756092988e-05, + "loss": 0.06288909912109375, + "step": 1037 + }, + { + "epoch": 0.07016357982966068, + "grad_norm": 0.39236193895339966, + "learning_rate": 2.9900087726329044e-05, + "loss": 0.08774566650390625, + "step": 1038 + }, + { + "epoch": 0.07023117480059483, + "grad_norm": 1.935506820678711, + "learning_rate": 2.98997071707693e-05, + "loss": 0.29913330078125, + "step": 1039 + }, + { + "epoch": 0.070298769771529, + "grad_norm": 0.7288399934768677, + "learning_rate": 2.989932589426906e-05, + "loss": 0.1938018798828125, + "step": 1040 + }, + { + "epoch": 0.07036636474246316, + "grad_norm": 0.4447568953037262, + "learning_rate": 2.9898943896846776e-05, + "loss": 0.0840911865234375, + "step": 1041 + }, + { + "epoch": 0.07043395971339732, + "grad_norm": 0.803397536277771, + "learning_rate": 2.989856117852093e-05, + "loss": 0.0999908447265625, + "step": 1042 + }, + { + "epoch": 0.07050155468433149, + "grad_norm": 1.5831501483917236, + "learning_rate": 2.989817773931005e-05, + "loss": 0.3109130859375, + "step": 1043 + }, + { + "epoch": 0.07056914965526564, + "grad_norm": 1.722156047821045, + "learning_rate": 2.989779357923267e-05, + "loss": 0.1553192138671875, + "step": 1044 + }, + { + "epoch": 0.07063674462619982, + "grad_norm": 2.8358781337738037, + "learning_rate": 2.989740869830739e-05, + "loss": 0.2078704833984375, + "step": 1045 + }, + { + "epoch": 0.07070433959713397, + "grad_norm": 1.668404459953308, + "learning_rate": 2.9897023096552837e-05, + "loss": 0.137542724609375, + "step": 1046 + }, + { + "epoch": 0.07077193456806813, + "grad_norm": 0.5044867396354675, + "learning_rate": 2.9896636773987658e-05, + "loss": 0.06797027587890625, + "step": 1047 + }, + { + "epoch": 0.0708395295390023, + "grad_norm": 0.9633924961090088, + "learning_rate": 2.9896249730630546e-05, + "loss": 0.07401275634765625, + "step": 1048 + }, + { + "epoch": 0.07090712450993646, + "grad_norm": 0.4688151776790619, + "learning_rate": 2.9895861966500242e-05, + "loss": 0.1023406982421875, + "step": 1049 + }, + { + "epoch": 0.07097471948087063, + "grad_norm": 1.0222822427749634, + "learning_rate": 2.9895473481615495e-05, + "loss": 0.22796630859375, + "step": 1050 + }, + { + "epoch": 0.07104231445180478, + "grad_norm": 0.957935631275177, + "learning_rate": 2.989508427599511e-05, + "loss": 0.177001953125, + "step": 1051 + }, + { + "epoch": 0.07110990942273895, + "grad_norm": 0.5727894902229309, + "learning_rate": 2.9894694349657915e-05, + "loss": 0.09510040283203125, + "step": 1052 + }, + { + "epoch": 0.07117750439367311, + "grad_norm": 1.690813660621643, + "learning_rate": 2.9894303702622775e-05, + "loss": 0.229888916015625, + "step": 1053 + }, + { + "epoch": 0.07124509936460727, + "grad_norm": 0.5275159478187561, + "learning_rate": 2.98939123349086e-05, + "loss": 0.074371337890625, + "step": 1054 + }, + { + "epoch": 0.07131269433554144, + "grad_norm": 1.432361125946045, + "learning_rate": 2.989352024653432e-05, + "loss": 0.16253662109375, + "step": 1055 + }, + { + "epoch": 0.0713802893064756, + "grad_norm": 0.944343626499176, + "learning_rate": 2.989312743751891e-05, + "loss": 0.0979461669921875, + "step": 1056 + }, + { + "epoch": 0.07144788427740976, + "grad_norm": 1.2682995796203613, + "learning_rate": 2.9892733907881375e-05, + "loss": 0.1595458984375, + "step": 1057 + }, + { + "epoch": 0.07151547924834392, + "grad_norm": 1.7946405410766602, + "learning_rate": 2.9892339657640753e-05, + "loss": 0.1036224365234375, + "step": 1058 + }, + { + "epoch": 0.07158307421927809, + "grad_norm": 1.7684751749038696, + "learning_rate": 2.9891944686816124e-05, + "loss": 0.20550537109375, + "step": 1059 + }, + { + "epoch": 0.07165066919021225, + "grad_norm": 1.4723124504089355, + "learning_rate": 2.9891548995426606e-05, + "loss": 0.17644500732421875, + "step": 1060 + }, + { + "epoch": 0.0717182641611464, + "grad_norm": 1.2342793941497803, + "learning_rate": 2.9891152583491332e-05, + "loss": 0.1331787109375, + "step": 1061 + }, + { + "epoch": 0.07178585913208058, + "grad_norm": 0.43215256929397583, + "learning_rate": 2.9890755451029488e-05, + "loss": 0.06599807739257812, + "step": 1062 + }, + { + "epoch": 0.07185345410301473, + "grad_norm": 0.3989132046699524, + "learning_rate": 2.9890357598060298e-05, + "loss": 0.040569305419921875, + "step": 1063 + }, + { + "epoch": 0.0719210490739489, + "grad_norm": 2.817308187484741, + "learning_rate": 2.9889959024602998e-05, + "loss": 0.19484710693359375, + "step": 1064 + }, + { + "epoch": 0.07198864404488306, + "grad_norm": 2.866114854812622, + "learning_rate": 2.9889559730676882e-05, + "loss": 0.240325927734375, + "step": 1065 + }, + { + "epoch": 0.07205623901581722, + "grad_norm": 1.1625744104385376, + "learning_rate": 2.9889159716301272e-05, + "loss": 0.1686859130859375, + "step": 1066 + }, + { + "epoch": 0.07212383398675139, + "grad_norm": 1.8676786422729492, + "learning_rate": 2.9888758981495517e-05, + "loss": 0.1173095703125, + "step": 1067 + }, + { + "epoch": 0.07219142895768554, + "grad_norm": 0.9458216428756714, + "learning_rate": 2.9888357526279008e-05, + "loss": 0.14310455322265625, + "step": 1068 + }, + { + "epoch": 0.07225902392861971, + "grad_norm": 0.8536631464958191, + "learning_rate": 2.988795535067118e-05, + "loss": 0.1098480224609375, + "step": 1069 + }, + { + "epoch": 0.07232661889955387, + "grad_norm": 1.1740399599075317, + "learning_rate": 2.988755245469148e-05, + "loss": 0.0645751953125, + "step": 1070 + }, + { + "epoch": 0.07239421387048804, + "grad_norm": 2.171377420425415, + "learning_rate": 2.9887148838359406e-05, + "loss": 0.156036376953125, + "step": 1071 + }, + { + "epoch": 0.0724618088414222, + "grad_norm": 3.1952130794525146, + "learning_rate": 2.9886744501694494e-05, + "loss": 0.1913909912109375, + "step": 1072 + }, + { + "epoch": 0.07252940381235636, + "grad_norm": 1.9059171676635742, + "learning_rate": 2.98863394447163e-05, + "loss": 0.210601806640625, + "step": 1073 + }, + { + "epoch": 0.07259699878329053, + "grad_norm": 1.8129764795303345, + "learning_rate": 2.9885933667444424e-05, + "loss": 0.21263885498046875, + "step": 1074 + }, + { + "epoch": 0.07266459375422468, + "grad_norm": 0.9295524954795837, + "learning_rate": 2.9885527169898506e-05, + "loss": 0.1925506591796875, + "step": 1075 + }, + { + "epoch": 0.07273218872515885, + "grad_norm": 0.7174832224845886, + "learning_rate": 2.9885119952098214e-05, + "loss": 0.1388702392578125, + "step": 1076 + }, + { + "epoch": 0.07279978369609301, + "grad_norm": 1.4660228490829468, + "learning_rate": 2.9884712014063246e-05, + "loss": 0.22479248046875, + "step": 1077 + }, + { + "epoch": 0.07286737866702718, + "grad_norm": 0.85959792137146, + "learning_rate": 2.9884303355813343e-05, + "loss": 0.07480621337890625, + "step": 1078 + }, + { + "epoch": 0.07293497363796134, + "grad_norm": 0.4652743637561798, + "learning_rate": 2.988389397736828e-05, + "loss": 0.09132003784179688, + "step": 1079 + }, + { + "epoch": 0.0730025686088955, + "grad_norm": 1.106212854385376, + "learning_rate": 2.9883483878747863e-05, + "loss": 0.141387939453125, + "step": 1080 + }, + { + "epoch": 0.07307016357982966, + "grad_norm": 1.0157514810562134, + "learning_rate": 2.988307305997194e-05, + "loss": 0.15012359619140625, + "step": 1081 + }, + { + "epoch": 0.07313775855076382, + "grad_norm": 0.746908962726593, + "learning_rate": 2.9882661521060382e-05, + "loss": 0.06589508056640625, + "step": 1082 + }, + { + "epoch": 0.07320535352169799, + "grad_norm": 2.068856716156006, + "learning_rate": 2.988224926203311e-05, + "loss": 0.201995849609375, + "step": 1083 + }, + { + "epoch": 0.07327294849263215, + "grad_norm": 1.7000885009765625, + "learning_rate": 2.9881836282910062e-05, + "loss": 0.14667129516601562, + "step": 1084 + }, + { + "epoch": 0.0733405434635663, + "grad_norm": 0.825502872467041, + "learning_rate": 2.988142258371123e-05, + "loss": 0.0987701416015625, + "step": 1085 + }, + { + "epoch": 0.07340813843450048, + "grad_norm": 2.550720453262329, + "learning_rate": 2.9881008164456623e-05, + "loss": 0.21038818359375, + "step": 1086 + }, + { + "epoch": 0.07347573340543463, + "grad_norm": 3.6872010231018066, + "learning_rate": 2.9880593025166298e-05, + "loss": 0.220367431640625, + "step": 1087 + }, + { + "epoch": 0.0735433283763688, + "grad_norm": 1.7688634395599365, + "learning_rate": 2.9880177165860343e-05, + "loss": 0.184661865234375, + "step": 1088 + }, + { + "epoch": 0.07361092334730296, + "grad_norm": 1.8804919719696045, + "learning_rate": 2.9879760586558876e-05, + "loss": 0.123687744140625, + "step": 1089 + }, + { + "epoch": 0.07367851831823713, + "grad_norm": 0.6761595010757446, + "learning_rate": 2.9879343287282054e-05, + "loss": 0.04827880859375, + "step": 1090 + }, + { + "epoch": 0.07374611328917129, + "grad_norm": 0.17635345458984375, + "learning_rate": 2.9878925268050072e-05, + "loss": 0.03223419189453125, + "step": 1091 + }, + { + "epoch": 0.07381370826010544, + "grad_norm": 1.0309463739395142, + "learning_rate": 2.9878506528883152e-05, + "loss": 0.1659698486328125, + "step": 1092 + }, + { + "epoch": 0.07388130323103961, + "grad_norm": 0.5245548486709595, + "learning_rate": 2.987808706980156e-05, + "loss": 0.0825653076171875, + "step": 1093 + }, + { + "epoch": 0.07394889820197377, + "grad_norm": 0.7278925776481628, + "learning_rate": 2.987766689082559e-05, + "loss": 0.129730224609375, + "step": 1094 + }, + { + "epoch": 0.07401649317290794, + "grad_norm": 0.5789534449577332, + "learning_rate": 2.9877245991975574e-05, + "loss": 0.1372222900390625, + "step": 1095 + }, + { + "epoch": 0.0740840881438421, + "grad_norm": 1.3314894437789917, + "learning_rate": 2.9876824373271872e-05, + "loss": 0.224151611328125, + "step": 1096 + }, + { + "epoch": 0.07415168311477625, + "grad_norm": 1.325044870376587, + "learning_rate": 2.9876402034734893e-05, + "loss": 0.2032470703125, + "step": 1097 + }, + { + "epoch": 0.07421927808571042, + "grad_norm": 0.7208755612373352, + "learning_rate": 2.987597897638507e-05, + "loss": 0.0456390380859375, + "step": 1098 + }, + { + "epoch": 0.07428687305664458, + "grad_norm": 1.057134747505188, + "learning_rate": 2.9875555198242867e-05, + "loss": 0.239898681640625, + "step": 1099 + }, + { + "epoch": 0.07435446802757875, + "grad_norm": 2.2850184440612793, + "learning_rate": 2.9875130700328796e-05, + "loss": 0.19134521484375, + "step": 1100 + }, + { + "epoch": 0.07442206299851291, + "grad_norm": 1.7901805639266968, + "learning_rate": 2.987470548266339e-05, + "loss": 0.2049102783203125, + "step": 1101 + }, + { + "epoch": 0.07448965796944708, + "grad_norm": 2.2583000659942627, + "learning_rate": 2.9874279545267233e-05, + "loss": 0.18509674072265625, + "step": 1102 + }, + { + "epoch": 0.07455725294038124, + "grad_norm": 1.1797682046890259, + "learning_rate": 2.9873852888160924e-05, + "loss": 0.14544677734375, + "step": 1103 + }, + { + "epoch": 0.07462484791131539, + "grad_norm": 0.97647625207901, + "learning_rate": 2.9873425511365116e-05, + "loss": 0.10546875, + "step": 1104 + }, + { + "epoch": 0.07469244288224956, + "grad_norm": 2.153688669204712, + "learning_rate": 2.9872997414900487e-05, + "loss": 0.2220458984375, + "step": 1105 + }, + { + "epoch": 0.07476003785318372, + "grad_norm": 0.9320057034492493, + "learning_rate": 2.9872568598787748e-05, + "loss": 0.1506500244140625, + "step": 1106 + }, + { + "epoch": 0.07482763282411789, + "grad_norm": 0.6520230770111084, + "learning_rate": 2.9872139063047645e-05, + "loss": 0.11602783203125, + "step": 1107 + }, + { + "epoch": 0.07489522779505205, + "grad_norm": 0.697313129901886, + "learning_rate": 2.9871708807700968e-05, + "loss": 0.09395599365234375, + "step": 1108 + }, + { + "epoch": 0.07496282276598622, + "grad_norm": 2.8352925777435303, + "learning_rate": 2.9871277832768533e-05, + "loss": 0.2349853515625, + "step": 1109 + }, + { + "epoch": 0.07503041773692037, + "grad_norm": 1.2986407279968262, + "learning_rate": 2.987084613827119e-05, + "loss": 0.0577239990234375, + "step": 1110 + }, + { + "epoch": 0.07509801270785453, + "grad_norm": 0.7026776671409607, + "learning_rate": 2.9870413724229836e-05, + "loss": 0.106201171875, + "step": 1111 + }, + { + "epoch": 0.0751656076787887, + "grad_norm": 2.0141124725341797, + "learning_rate": 2.9869980590665384e-05, + "loss": 0.207244873046875, + "step": 1112 + }, + { + "epoch": 0.07523320264972286, + "grad_norm": 1.0627650022506714, + "learning_rate": 2.9869546737598796e-05, + "loss": 0.147796630859375, + "step": 1113 + }, + { + "epoch": 0.07530079762065703, + "grad_norm": 0.5204144716262817, + "learning_rate": 2.9869112165051063e-05, + "loss": 0.026706695556640625, + "step": 1114 + }, + { + "epoch": 0.07536839259159119, + "grad_norm": 0.5319059491157532, + "learning_rate": 2.9868676873043217e-05, + "loss": 0.05999755859375, + "step": 1115 + }, + { + "epoch": 0.07543598756252534, + "grad_norm": 4.669980525970459, + "learning_rate": 2.9868240861596313e-05, + "loss": 0.25579833984375, + "step": 1116 + }, + { + "epoch": 0.07550358253345951, + "grad_norm": 0.57204669713974, + "learning_rate": 2.986780413073145e-05, + "loss": 0.067626953125, + "step": 1117 + }, + { + "epoch": 0.07557117750439367, + "grad_norm": 0.44288432598114014, + "learning_rate": 2.9867366680469768e-05, + "loss": 0.06453704833984375, + "step": 1118 + }, + { + "epoch": 0.07563877247532784, + "grad_norm": 0.20165890455245972, + "learning_rate": 2.986692851083242e-05, + "loss": 0.03050994873046875, + "step": 1119 + }, + { + "epoch": 0.075706367446262, + "grad_norm": 1.8367241621017456, + "learning_rate": 2.986648962184062e-05, + "loss": 0.1052703857421875, + "step": 1120 + }, + { + "epoch": 0.07577396241719617, + "grad_norm": 1.6155513525009155, + "learning_rate": 2.986605001351559e-05, + "loss": 0.229949951171875, + "step": 1121 + }, + { + "epoch": 0.07584155738813032, + "grad_norm": 1.4357457160949707, + "learning_rate": 2.986560968587862e-05, + "loss": 0.21038818359375, + "step": 1122 + }, + { + "epoch": 0.07590915235906448, + "grad_norm": 0.6332769989967346, + "learning_rate": 2.9865168638951e-05, + "loss": 0.09059906005859375, + "step": 1123 + }, + { + "epoch": 0.07597674732999865, + "grad_norm": 4.848388195037842, + "learning_rate": 2.9864726872754075e-05, + "loss": 0.290252685546875, + "step": 1124 + }, + { + "epoch": 0.07604434230093281, + "grad_norm": 1.9156436920166016, + "learning_rate": 2.986428438730922e-05, + "loss": 0.2567138671875, + "step": 1125 + }, + { + "epoch": 0.07611193727186698, + "grad_norm": 0.7738792896270752, + "learning_rate": 2.9863841182637846e-05, + "loss": 0.0891571044921875, + "step": 1126 + }, + { + "epoch": 0.07617953224280113, + "grad_norm": 0.5510578751564026, + "learning_rate": 2.98633972587614e-05, + "loss": 0.1310272216796875, + "step": 1127 + }, + { + "epoch": 0.07624712721373529, + "grad_norm": 1.212103009223938, + "learning_rate": 2.9862952615701365e-05, + "loss": 0.1902923583984375, + "step": 1128 + }, + { + "epoch": 0.07631472218466946, + "grad_norm": 0.9745903611183167, + "learning_rate": 2.9862507253479244e-05, + "loss": 0.0775604248046875, + "step": 1129 + }, + { + "epoch": 0.07638231715560362, + "grad_norm": 1.6042511463165283, + "learning_rate": 2.9862061172116593e-05, + "loss": 0.1813812255859375, + "step": 1130 + }, + { + "epoch": 0.07644991212653779, + "grad_norm": 1.8843108415603638, + "learning_rate": 2.9861614371635e-05, + "loss": 0.169464111328125, + "step": 1131 + }, + { + "epoch": 0.07651750709747195, + "grad_norm": 0.6696853637695312, + "learning_rate": 2.986116685205608e-05, + "loss": 0.06440353393554688, + "step": 1132 + }, + { + "epoch": 0.07658510206840612, + "grad_norm": 0.8460825681686401, + "learning_rate": 2.9860718613401487e-05, + "loss": 0.1351470947265625, + "step": 1133 + }, + { + "epoch": 0.07665269703934027, + "grad_norm": 0.8472217917442322, + "learning_rate": 2.9860269655692912e-05, + "loss": 0.164825439453125, + "step": 1134 + }, + { + "epoch": 0.07672029201027443, + "grad_norm": 1.5148669481277466, + "learning_rate": 2.985981997895207e-05, + "loss": 0.1175994873046875, + "step": 1135 + }, + { + "epoch": 0.0767878869812086, + "grad_norm": 0.30249154567718506, + "learning_rate": 2.985936958320073e-05, + "loss": 0.06206512451171875, + "step": 1136 + }, + { + "epoch": 0.07685548195214276, + "grad_norm": 0.936980128288269, + "learning_rate": 2.9858918468460678e-05, + "loss": 0.18817138671875, + "step": 1137 + }, + { + "epoch": 0.07692307692307693, + "grad_norm": 2.2543318271636963, + "learning_rate": 2.9858466634753744e-05, + "loss": 0.232086181640625, + "step": 1138 + }, + { + "epoch": 0.07699067189401108, + "grad_norm": 2.2292051315307617, + "learning_rate": 2.985801408210179e-05, + "loss": 0.13602447509765625, + "step": 1139 + }, + { + "epoch": 0.07705826686494525, + "grad_norm": 0.7231391072273254, + "learning_rate": 2.9857560810526712e-05, + "loss": 0.111968994140625, + "step": 1140 + }, + { + "epoch": 0.07712586183587941, + "grad_norm": 2.773320436477661, + "learning_rate": 2.9857106820050447e-05, + "loss": 0.2261810302734375, + "step": 1141 + }, + { + "epoch": 0.07719345680681357, + "grad_norm": 2.9671144485473633, + "learning_rate": 2.985665211069496e-05, + "loss": 0.2227325439453125, + "step": 1142 + }, + { + "epoch": 0.07726105177774774, + "grad_norm": 2.8645756244659424, + "learning_rate": 2.985619668248225e-05, + "loss": 0.2114105224609375, + "step": 1143 + }, + { + "epoch": 0.0773286467486819, + "grad_norm": 1.3915486335754395, + "learning_rate": 2.985574053543435e-05, + "loss": 0.084716796875, + "step": 1144 + }, + { + "epoch": 0.07739624171961607, + "grad_norm": 1.2810064554214478, + "learning_rate": 2.985528366957334e-05, + "loss": 0.08847808837890625, + "step": 1145 + }, + { + "epoch": 0.07746383669055022, + "grad_norm": 0.6093810200691223, + "learning_rate": 2.9854826084921326e-05, + "loss": 0.12129974365234375, + "step": 1146 + }, + { + "epoch": 0.07753143166148438, + "grad_norm": 1.3651700019836426, + "learning_rate": 2.9854367781500437e-05, + "loss": 0.21319580078125, + "step": 1147 + }, + { + "epoch": 0.07759902663241855, + "grad_norm": 1.5841312408447266, + "learning_rate": 2.985390875933286e-05, + "loss": 0.1655120849609375, + "step": 1148 + }, + { + "epoch": 0.0776666216033527, + "grad_norm": 1.9463224411010742, + "learning_rate": 2.9853449018440807e-05, + "loss": 0.216217041015625, + "step": 1149 + }, + { + "epoch": 0.07773421657428688, + "grad_norm": 1.0441999435424805, + "learning_rate": 2.985298855884651e-05, + "loss": 0.09476470947265625, + "step": 1150 + }, + { + "epoch": 0.07780181154522103, + "grad_norm": 1.052998661994934, + "learning_rate": 2.985252738057226e-05, + "loss": 0.09526824951171875, + "step": 1151 + }, + { + "epoch": 0.0778694065161552, + "grad_norm": 0.36954882740974426, + "learning_rate": 2.9852065483640366e-05, + "loss": 0.07309722900390625, + "step": 1152 + }, + { + "epoch": 0.07793700148708936, + "grad_norm": 0.5255449414253235, + "learning_rate": 2.9851602868073187e-05, + "loss": 0.130126953125, + "step": 1153 + }, + { + "epoch": 0.07800459645802352, + "grad_norm": 1.4370874166488647, + "learning_rate": 2.9851139533893093e-05, + "loss": 0.1557464599609375, + "step": 1154 + }, + { + "epoch": 0.07807219142895769, + "grad_norm": 2.069315195083618, + "learning_rate": 2.9850675481122514e-05, + "loss": 0.11279296875, + "step": 1155 + }, + { + "epoch": 0.07813978639989184, + "grad_norm": 2.9388182163238525, + "learning_rate": 2.9850210709783898e-05, + "loss": 0.19264984130859375, + "step": 1156 + }, + { + "epoch": 0.07820738137082602, + "grad_norm": 0.8841016292572021, + "learning_rate": 2.9849745219899734e-05, + "loss": 0.07791900634765625, + "step": 1157 + }, + { + "epoch": 0.07827497634176017, + "grad_norm": 2.545728921890259, + "learning_rate": 2.9849279011492548e-05, + "loss": 0.241424560546875, + "step": 1158 + }, + { + "epoch": 0.07834257131269434, + "grad_norm": 1.306354284286499, + "learning_rate": 2.9848812084584897e-05, + "loss": 0.15478515625, + "step": 1159 + }, + { + "epoch": 0.0784101662836285, + "grad_norm": 1.4798916578292847, + "learning_rate": 2.9848344439199374e-05, + "loss": 0.157318115234375, + "step": 1160 + }, + { + "epoch": 0.07847776125456266, + "grad_norm": 1.6816705465316772, + "learning_rate": 2.9847876075358604e-05, + "loss": 0.1540985107421875, + "step": 1161 + }, + { + "epoch": 0.07854535622549683, + "grad_norm": 2.2920281887054443, + "learning_rate": 2.9847406993085254e-05, + "loss": 0.14324951171875, + "step": 1162 + }, + { + "epoch": 0.07861295119643098, + "grad_norm": 1.312567949295044, + "learning_rate": 2.9846937192402018e-05, + "loss": 0.1348114013671875, + "step": 1163 + }, + { + "epoch": 0.07868054616736515, + "grad_norm": 0.9436526298522949, + "learning_rate": 2.984646667333163e-05, + "loss": 0.0865936279296875, + "step": 1164 + }, + { + "epoch": 0.07874814113829931, + "grad_norm": 1.6099838018417358, + "learning_rate": 2.984599543589685e-05, + "loss": 0.2811279296875, + "step": 1165 + }, + { + "epoch": 0.07881573610923347, + "grad_norm": 0.8434215188026428, + "learning_rate": 2.9845523480120487e-05, + "loss": 0.108917236328125, + "step": 1166 + }, + { + "epoch": 0.07888333108016764, + "grad_norm": 1.9045214653015137, + "learning_rate": 2.984505080602538e-05, + "loss": 0.3519287109375, + "step": 1167 + }, + { + "epoch": 0.0789509260511018, + "grad_norm": 1.3648487329483032, + "learning_rate": 2.984457741363439e-05, + "loss": 0.253326416015625, + "step": 1168 + }, + { + "epoch": 0.07901852102203596, + "grad_norm": 1.0085936784744263, + "learning_rate": 2.984410330297043e-05, + "loss": 0.1895751953125, + "step": 1169 + }, + { + "epoch": 0.07908611599297012, + "grad_norm": 1.8587989807128906, + "learning_rate": 2.9843628474056436e-05, + "loss": 0.171630859375, + "step": 1170 + }, + { + "epoch": 0.07915371096390429, + "grad_norm": 2.3033738136291504, + "learning_rate": 2.9843152926915382e-05, + "loss": 0.1949920654296875, + "step": 1171 + }, + { + "epoch": 0.07922130593483845, + "grad_norm": 1.5926214456558228, + "learning_rate": 2.984267666157028e-05, + "loss": 0.196380615234375, + "step": 1172 + }, + { + "epoch": 0.0792889009057726, + "grad_norm": 0.27894309163093567, + "learning_rate": 2.984219967804418e-05, + "loss": 0.039821624755859375, + "step": 1173 + }, + { + "epoch": 0.07935649587670678, + "grad_norm": 1.1988084316253662, + "learning_rate": 2.9841721976360154e-05, + "loss": 0.100860595703125, + "step": 1174 + }, + { + "epoch": 0.07942409084764093, + "grad_norm": 2.713534116744995, + "learning_rate": 2.9841243556541323e-05, + "loss": 0.2498779296875, + "step": 1175 + }, + { + "epoch": 0.0794916858185751, + "grad_norm": 1.3334455490112305, + "learning_rate": 2.9840764418610827e-05, + "loss": 0.138641357421875, + "step": 1176 + }, + { + "epoch": 0.07955928078950926, + "grad_norm": 0.4509168565273285, + "learning_rate": 2.9840284562591863e-05, + "loss": 0.073394775390625, + "step": 1177 + }, + { + "epoch": 0.07962687576044342, + "grad_norm": 1.1543267965316772, + "learning_rate": 2.9839803988507636e-05, + "loss": 0.19441604614257812, + "step": 1178 + }, + { + "epoch": 0.07969447073137759, + "grad_norm": 3.631807327270508, + "learning_rate": 2.9839322696381403e-05, + "loss": 0.1956634521484375, + "step": 1179 + }, + { + "epoch": 0.07976206570231174, + "grad_norm": 1.3238742351531982, + "learning_rate": 2.983884068623645e-05, + "loss": 0.084075927734375, + "step": 1180 + }, + { + "epoch": 0.07982966067324591, + "grad_norm": 2.667915105819702, + "learning_rate": 2.983835795809611e-05, + "loss": 0.198394775390625, + "step": 1181 + }, + { + "epoch": 0.07989725564418007, + "grad_norm": 0.2754450738430023, + "learning_rate": 2.983787451198373e-05, + "loss": 0.035572052001953125, + "step": 1182 + }, + { + "epoch": 0.07996485061511424, + "grad_norm": 0.25378119945526123, + "learning_rate": 2.9837390347922702e-05, + "loss": 0.0335845947265625, + "step": 1183 + }, + { + "epoch": 0.0800324455860484, + "grad_norm": 0.37249138951301575, + "learning_rate": 2.9836905465936462e-05, + "loss": 0.055171966552734375, + "step": 1184 + }, + { + "epoch": 0.08010004055698255, + "grad_norm": 1.0911989212036133, + "learning_rate": 2.983641986604846e-05, + "loss": 0.172637939453125, + "step": 1185 + }, + { + "epoch": 0.08016763552791673, + "grad_norm": 0.3192862570285797, + "learning_rate": 2.9835933548282204e-05, + "loss": 0.040374755859375, + "step": 1186 + }, + { + "epoch": 0.08023523049885088, + "grad_norm": 1.9144175052642822, + "learning_rate": 2.9835446512661218e-05, + "loss": 0.1856842041015625, + "step": 1187 + }, + { + "epoch": 0.08030282546978505, + "grad_norm": 0.5168705582618713, + "learning_rate": 2.9834958759209068e-05, + "loss": 0.09439849853515625, + "step": 1188 + }, + { + "epoch": 0.08037042044071921, + "grad_norm": 0.6051884293556213, + "learning_rate": 2.9834470287949357e-05, + "loss": 0.0686492919921875, + "step": 1189 + }, + { + "epoch": 0.08043801541165338, + "grad_norm": 0.7489222288131714, + "learning_rate": 2.9833981098905717e-05, + "loss": 0.06623077392578125, + "step": 1190 + }, + { + "epoch": 0.08050561038258754, + "grad_norm": 0.3584953546524048, + "learning_rate": 2.9833491192101818e-05, + "loss": 0.0873260498046875, + "step": 1191 + }, + { + "epoch": 0.08057320535352169, + "grad_norm": 0.72623610496521, + "learning_rate": 2.983300056756137e-05, + "loss": 0.07576751708984375, + "step": 1192 + }, + { + "epoch": 0.08064080032445586, + "grad_norm": 0.4888897240161896, + "learning_rate": 2.9832509225308105e-05, + "loss": 0.04840850830078125, + "step": 1193 + }, + { + "epoch": 0.08070839529539002, + "grad_norm": 0.7676663398742676, + "learning_rate": 2.9832017165365807e-05, + "loss": 0.164093017578125, + "step": 1194 + }, + { + "epoch": 0.08077599026632419, + "grad_norm": 0.9035090804100037, + "learning_rate": 2.9831524387758276e-05, + "loss": 0.1685638427734375, + "step": 1195 + }, + { + "epoch": 0.08084358523725835, + "grad_norm": 1.1012446880340576, + "learning_rate": 2.983103089250936e-05, + "loss": 0.06503677368164062, + "step": 1196 + }, + { + "epoch": 0.0809111802081925, + "grad_norm": 1.5298057794570923, + "learning_rate": 2.9830536679642937e-05, + "loss": 0.23175048828125, + "step": 1197 + }, + { + "epoch": 0.08097877517912667, + "grad_norm": 0.6964017748832703, + "learning_rate": 2.9830041749182918e-05, + "loss": 0.0401458740234375, + "step": 1198 + }, + { + "epoch": 0.08104637015006083, + "grad_norm": 0.26712894439697266, + "learning_rate": 2.9829546101153253e-05, + "loss": 0.0361480712890625, + "step": 1199 + }, + { + "epoch": 0.081113965120995, + "grad_norm": 1.6895982027053833, + "learning_rate": 2.9829049735577922e-05, + "loss": 0.1802520751953125, + "step": 1200 + }, + { + "epoch": 0.08118156009192916, + "grad_norm": 1.197412133216858, + "learning_rate": 2.9828552652480944e-05, + "loss": 0.214263916015625, + "step": 1201 + }, + { + "epoch": 0.08124915506286333, + "grad_norm": 0.931955873966217, + "learning_rate": 2.9828054851886372e-05, + "loss": 0.1197509765625, + "step": 1202 + }, + { + "epoch": 0.08131675003379749, + "grad_norm": 1.060117483139038, + "learning_rate": 2.9827556333818292e-05, + "loss": 0.104827880859375, + "step": 1203 + }, + { + "epoch": 0.08138434500473164, + "grad_norm": 0.6410755515098572, + "learning_rate": 2.982705709830082e-05, + "loss": 0.04161834716796875, + "step": 1204 + }, + { + "epoch": 0.08145193997566581, + "grad_norm": 0.3262999951839447, + "learning_rate": 2.9826557145358124e-05, + "loss": 0.02729034423828125, + "step": 1205 + }, + { + "epoch": 0.08151953494659997, + "grad_norm": 1.7476229667663574, + "learning_rate": 2.9826056475014385e-05, + "loss": 0.1936187744140625, + "step": 1206 + }, + { + "epoch": 0.08158712991753414, + "grad_norm": 0.9835536479949951, + "learning_rate": 2.982555508729383e-05, + "loss": 0.10500335693359375, + "step": 1207 + }, + { + "epoch": 0.0816547248884683, + "grad_norm": 0.7231082916259766, + "learning_rate": 2.9825052982220722e-05, + "loss": 0.0952911376953125, + "step": 1208 + }, + { + "epoch": 0.08172231985940245, + "grad_norm": 3.8267221450805664, + "learning_rate": 2.9824550159819358e-05, + "loss": 0.213623046875, + "step": 1209 + }, + { + "epoch": 0.08178991483033662, + "grad_norm": 0.5793026089668274, + "learning_rate": 2.9824046620114064e-05, + "loss": 0.12824249267578125, + "step": 1210 + }, + { + "epoch": 0.08185750980127078, + "grad_norm": 0.6305358409881592, + "learning_rate": 2.9823542363129203e-05, + "loss": 0.10318374633789062, + "step": 1211 + }, + { + "epoch": 0.08192510477220495, + "grad_norm": 1.3787438869476318, + "learning_rate": 2.982303738888917e-05, + "loss": 0.1243133544921875, + "step": 1212 + }, + { + "epoch": 0.08199269974313911, + "grad_norm": 0.6129947304725647, + "learning_rate": 2.9822531697418414e-05, + "loss": 0.1573028564453125, + "step": 1213 + }, + { + "epoch": 0.08206029471407328, + "grad_norm": 1.1993378400802612, + "learning_rate": 2.9822025288741387e-05, + "loss": 0.1661529541015625, + "step": 1214 + }, + { + "epoch": 0.08212788968500744, + "grad_norm": 1.5600917339324951, + "learning_rate": 2.98215181628826e-05, + "loss": 0.23504638671875, + "step": 1215 + }, + { + "epoch": 0.08219548465594159, + "grad_norm": 0.5006536245346069, + "learning_rate": 2.9821010319866595e-05, + "loss": 0.05230712890625, + "step": 1216 + }, + { + "epoch": 0.08226307962687576, + "grad_norm": 0.5122115612030029, + "learning_rate": 2.9820501759717938e-05, + "loss": 0.1075897216796875, + "step": 1217 + }, + { + "epoch": 0.08233067459780992, + "grad_norm": 1.7273048162460327, + "learning_rate": 2.9819992482461236e-05, + "loss": 0.176513671875, + "step": 1218 + }, + { + "epoch": 0.08239826956874409, + "grad_norm": 0.4893619418144226, + "learning_rate": 2.9819482488121136e-05, + "loss": 0.06430435180664062, + "step": 1219 + }, + { + "epoch": 0.08246586453967825, + "grad_norm": 1.340370535850525, + "learning_rate": 2.9818971776722312e-05, + "loss": 0.1107940673828125, + "step": 1220 + }, + { + "epoch": 0.08253345951061242, + "grad_norm": 1.2153233289718628, + "learning_rate": 2.981846034828947e-05, + "loss": 0.15618896484375, + "step": 1221 + }, + { + "epoch": 0.08260105448154657, + "grad_norm": 1.2270041704177856, + "learning_rate": 2.9817948202847368e-05, + "loss": 0.1919403076171875, + "step": 1222 + }, + { + "epoch": 0.08266864945248073, + "grad_norm": 1.6146725416183472, + "learning_rate": 2.9817435340420776e-05, + "loss": 0.1367645263671875, + "step": 1223 + }, + { + "epoch": 0.0827362444234149, + "grad_norm": 0.644270658493042, + "learning_rate": 2.9816921761034515e-05, + "loss": 0.0591278076171875, + "step": 1224 + }, + { + "epoch": 0.08280383939434906, + "grad_norm": 2.8538973331451416, + "learning_rate": 2.9816407464713436e-05, + "loss": 0.2364501953125, + "step": 1225 + }, + { + "epoch": 0.08287143436528323, + "grad_norm": 1.3594285249710083, + "learning_rate": 2.9815892451482417e-05, + "loss": 0.1378173828125, + "step": 1226 + }, + { + "epoch": 0.08293902933621738, + "grad_norm": 0.5587214827537537, + "learning_rate": 2.9815376721366384e-05, + "loss": 0.10329818725585938, + "step": 1227 + }, + { + "epoch": 0.08300662430715154, + "grad_norm": 2.299748420715332, + "learning_rate": 2.981486027439029e-05, + "loss": 0.173553466796875, + "step": 1228 + }, + { + "epoch": 0.08307421927808571, + "grad_norm": 1.3051633834838867, + "learning_rate": 2.9814343110579122e-05, + "loss": 0.1116180419921875, + "step": 1229 + }, + { + "epoch": 0.08314181424901987, + "grad_norm": 0.45730140805244446, + "learning_rate": 2.981382522995791e-05, + "loss": 0.041290283203125, + "step": 1230 + }, + { + "epoch": 0.08320940921995404, + "grad_norm": 0.9879543781280518, + "learning_rate": 2.98133066325517e-05, + "loss": 0.071746826171875, + "step": 1231 + }, + { + "epoch": 0.0832770041908882, + "grad_norm": 1.561829686164856, + "learning_rate": 2.9812787318385595e-05, + "loss": 0.138702392578125, + "step": 1232 + }, + { + "epoch": 0.08334459916182237, + "grad_norm": 0.7045181393623352, + "learning_rate": 2.981226728748472e-05, + "loss": 0.0982208251953125, + "step": 1233 + }, + { + "epoch": 0.08341219413275652, + "grad_norm": 1.1093374490737915, + "learning_rate": 2.981174653987424e-05, + "loss": 0.14530181884765625, + "step": 1234 + }, + { + "epoch": 0.08347978910369068, + "grad_norm": 1.565000295639038, + "learning_rate": 2.9811225075579346e-05, + "loss": 0.29608154296875, + "step": 1235 + }, + { + "epoch": 0.08354738407462485, + "grad_norm": 1.8562242984771729, + "learning_rate": 2.9810702894625273e-05, + "loss": 0.18743896484375, + "step": 1236 + }, + { + "epoch": 0.08361497904555901, + "grad_norm": 0.85418701171875, + "learning_rate": 2.9810179997037285e-05, + "loss": 0.07590484619140625, + "step": 1237 + }, + { + "epoch": 0.08368257401649318, + "grad_norm": 1.808819055557251, + "learning_rate": 2.9809656382840688e-05, + "loss": 0.189422607421875, + "step": 1238 + }, + { + "epoch": 0.08375016898742733, + "grad_norm": 0.9450618624687195, + "learning_rate": 2.9809132052060813e-05, + "loss": 0.09185028076171875, + "step": 1239 + }, + { + "epoch": 0.0838177639583615, + "grad_norm": 0.6195136308670044, + "learning_rate": 2.9808607004723036e-05, + "loss": 0.0972137451171875, + "step": 1240 + }, + { + "epoch": 0.08388535892929566, + "grad_norm": 2.064993381500244, + "learning_rate": 2.9808081240852756e-05, + "loss": 0.2233123779296875, + "step": 1241 + }, + { + "epoch": 0.08395295390022982, + "grad_norm": 1.610664963722229, + "learning_rate": 2.9807554760475416e-05, + "loss": 0.223724365234375, + "step": 1242 + }, + { + "epoch": 0.08402054887116399, + "grad_norm": 0.9693560600280762, + "learning_rate": 2.980702756361649e-05, + "loss": 0.1409912109375, + "step": 1243 + }, + { + "epoch": 0.08408814384209815, + "grad_norm": 0.618140459060669, + "learning_rate": 2.9806499650301482e-05, + "loss": 0.0982818603515625, + "step": 1244 + }, + { + "epoch": 0.08415573881303232, + "grad_norm": 1.247897982597351, + "learning_rate": 2.9805971020555944e-05, + "loss": 0.149566650390625, + "step": 1245 + }, + { + "epoch": 0.08422333378396647, + "grad_norm": 3.737386703491211, + "learning_rate": 2.980544167440545e-05, + "loss": 0.268310546875, + "step": 1246 + }, + { + "epoch": 0.08429092875490063, + "grad_norm": 1.3044321537017822, + "learning_rate": 2.9804911611875615e-05, + "loss": 0.19153594970703125, + "step": 1247 + }, + { + "epoch": 0.0843585237258348, + "grad_norm": 0.7349656224250793, + "learning_rate": 2.9804380832992083e-05, + "loss": 0.1695098876953125, + "step": 1248 + }, + { + "epoch": 0.08442611869676896, + "grad_norm": 0.7769888639450073, + "learning_rate": 2.9803849337780542e-05, + "loss": 0.09978485107421875, + "step": 1249 + }, + { + "epoch": 0.08449371366770313, + "grad_norm": 1.1449787616729736, + "learning_rate": 2.9803317126266706e-05, + "loss": 0.15460205078125, + "step": 1250 + }, + { + "epoch": 0.08456130863863728, + "grad_norm": 1.0255545377731323, + "learning_rate": 2.9802784198476325e-05, + "loss": 0.159393310546875, + "step": 1251 + }, + { + "epoch": 0.08462890360957145, + "grad_norm": 0.7570019960403442, + "learning_rate": 2.9802250554435184e-05, + "loss": 0.13779449462890625, + "step": 1252 + }, + { + "epoch": 0.08469649858050561, + "grad_norm": 1.893444538116455, + "learning_rate": 2.9801716194169113e-05, + "loss": 0.17485809326171875, + "step": 1253 + }, + { + "epoch": 0.08476409355143977, + "grad_norm": 2.204000949859619, + "learning_rate": 2.9801181117703956e-05, + "loss": 0.217254638671875, + "step": 1254 + }, + { + "epoch": 0.08483168852237394, + "grad_norm": 0.9012846350669861, + "learning_rate": 2.9800645325065614e-05, + "loss": 0.05915069580078125, + "step": 1255 + }, + { + "epoch": 0.0848992834933081, + "grad_norm": 0.6848583817481995, + "learning_rate": 2.980010881628e-05, + "loss": 0.08164596557617188, + "step": 1256 + }, + { + "epoch": 0.08496687846424227, + "grad_norm": 0.822017252445221, + "learning_rate": 2.979957159137309e-05, + "loss": 0.0526123046875, + "step": 1257 + }, + { + "epoch": 0.08503447343517642, + "grad_norm": 0.3644077479839325, + "learning_rate": 2.979903365037086e-05, + "loss": 0.0769500732421875, + "step": 1258 + }, + { + "epoch": 0.08510206840611058, + "grad_norm": 0.5207558870315552, + "learning_rate": 2.9798494993299352e-05, + "loss": 0.0892181396484375, + "step": 1259 + }, + { + "epoch": 0.08516966337704475, + "grad_norm": 1.6702067852020264, + "learning_rate": 2.9797955620184627e-05, + "loss": 0.23681640625, + "step": 1260 + }, + { + "epoch": 0.0852372583479789, + "grad_norm": 1.278272271156311, + "learning_rate": 2.9797415531052776e-05, + "loss": 0.09412384033203125, + "step": 1261 + }, + { + "epoch": 0.08530485331891308, + "grad_norm": 0.6121610403060913, + "learning_rate": 2.9796874725929945e-05, + "loss": 0.1201019287109375, + "step": 1262 + }, + { + "epoch": 0.08537244828984723, + "grad_norm": 0.8926201462745667, + "learning_rate": 2.979633320484229e-05, + "loss": 0.1491851806640625, + "step": 1263 + }, + { + "epoch": 0.0854400432607814, + "grad_norm": 0.5224549174308777, + "learning_rate": 2.9795790967816013e-05, + "loss": 0.0992584228515625, + "step": 1264 + }, + { + "epoch": 0.08550763823171556, + "grad_norm": 2.277129650115967, + "learning_rate": 2.979524801487736e-05, + "loss": 0.2016448974609375, + "step": 1265 + }, + { + "epoch": 0.08557523320264972, + "grad_norm": 0.263327032327652, + "learning_rate": 2.97947043460526e-05, + "loss": 0.035579681396484375, + "step": 1266 + }, + { + "epoch": 0.08564282817358389, + "grad_norm": 1.0981897115707397, + "learning_rate": 2.979415996136803e-05, + "loss": 0.21051025390625, + "step": 1267 + }, + { + "epoch": 0.08571042314451804, + "grad_norm": 0.5252063870429993, + "learning_rate": 2.979361486085e-05, + "loss": 0.09075927734375, + "step": 1268 + }, + { + "epoch": 0.08577801811545221, + "grad_norm": 2.482211112976074, + "learning_rate": 2.979306904452488e-05, + "loss": 0.265380859375, + "step": 1269 + }, + { + "epoch": 0.08584561308638637, + "grad_norm": 0.6982547640800476, + "learning_rate": 2.979252251241909e-05, + "loss": 0.1025848388671875, + "step": 1270 + }, + { + "epoch": 0.08591320805732054, + "grad_norm": 1.220247745513916, + "learning_rate": 2.9791975264559063e-05, + "loss": 0.165069580078125, + "step": 1271 + }, + { + "epoch": 0.0859808030282547, + "grad_norm": 0.43024957180023193, + "learning_rate": 2.979142730097128e-05, + "loss": 0.0742645263671875, + "step": 1272 + }, + { + "epoch": 0.08604839799918886, + "grad_norm": 0.34483376145362854, + "learning_rate": 2.979087862168226e-05, + "loss": 0.05152130126953125, + "step": 1273 + }, + { + "epoch": 0.08611599297012303, + "grad_norm": 1.0074121952056885, + "learning_rate": 2.9790329226718544e-05, + "loss": 0.1799774169921875, + "step": 1274 + }, + { + "epoch": 0.08618358794105718, + "grad_norm": 0.7853206396102905, + "learning_rate": 2.978977911610673e-05, + "loss": 0.141815185546875, + "step": 1275 + }, + { + "epoch": 0.08625118291199135, + "grad_norm": 1.2520252466201782, + "learning_rate": 2.9789228289873417e-05, + "loss": 0.264556884765625, + "step": 1276 + }, + { + "epoch": 0.08631877788292551, + "grad_norm": 2.228017807006836, + "learning_rate": 2.9788676748045268e-05, + "loss": 0.1814422607421875, + "step": 1277 + }, + { + "epoch": 0.08638637285385967, + "grad_norm": 0.800837516784668, + "learning_rate": 2.9788124490648967e-05, + "loss": 0.08795166015625, + "step": 1278 + }, + { + "epoch": 0.08645396782479384, + "grad_norm": 1.2241814136505127, + "learning_rate": 2.978757151771124e-05, + "loss": 0.217926025390625, + "step": 1279 + }, + { + "epoch": 0.086521562795728, + "grad_norm": 0.5761352777481079, + "learning_rate": 2.9787017829258836e-05, + "loss": 0.08929443359375, + "step": 1280 + }, + { + "epoch": 0.08658915776666216, + "grad_norm": 0.2777385413646698, + "learning_rate": 2.9786463425318552e-05, + "loss": 0.0525665283203125, + "step": 1281 + }, + { + "epoch": 0.08665675273759632, + "grad_norm": 0.3609611392021179, + "learning_rate": 2.9785908305917212e-05, + "loss": 0.06855010986328125, + "step": 1282 + }, + { + "epoch": 0.08672434770853049, + "grad_norm": 1.7124700546264648, + "learning_rate": 2.9785352471081676e-05, + "loss": 0.1869964599609375, + "step": 1283 + }, + { + "epoch": 0.08679194267946465, + "grad_norm": 1.4708424806594849, + "learning_rate": 2.9784795920838837e-05, + "loss": 0.200225830078125, + "step": 1284 + }, + { + "epoch": 0.0868595376503988, + "grad_norm": 1.0613912343978882, + "learning_rate": 2.9784238655215627e-05, + "loss": 0.15297317504882812, + "step": 1285 + }, + { + "epoch": 0.08692713262133298, + "grad_norm": 0.5553976893424988, + "learning_rate": 2.978368067423901e-05, + "loss": 0.1147918701171875, + "step": 1286 + }, + { + "epoch": 0.08699472759226713, + "grad_norm": 2.1821749210357666, + "learning_rate": 2.978312197793598e-05, + "loss": 0.301025390625, + "step": 1287 + }, + { + "epoch": 0.0870623225632013, + "grad_norm": 1.1702677011489868, + "learning_rate": 2.9782562566333575e-05, + "loss": 0.13896942138671875, + "step": 1288 + }, + { + "epoch": 0.08712991753413546, + "grad_norm": 0.9066067934036255, + "learning_rate": 2.978200243945886e-05, + "loss": 0.0889739990234375, + "step": 1289 + }, + { + "epoch": 0.08719751250506962, + "grad_norm": 1.6922136545181274, + "learning_rate": 2.9781441597338942e-05, + "loss": 0.167510986328125, + "step": 1290 + }, + { + "epoch": 0.08726510747600379, + "grad_norm": 0.6187591552734375, + "learning_rate": 2.978088004000095e-05, + "loss": 0.0682830810546875, + "step": 1291 + }, + { + "epoch": 0.08733270244693794, + "grad_norm": 0.8485522270202637, + "learning_rate": 2.9780317767472064e-05, + "loss": 0.1171722412109375, + "step": 1292 + }, + { + "epoch": 0.08740029741787211, + "grad_norm": 0.953166663646698, + "learning_rate": 2.9779754779779487e-05, + "loss": 0.1555633544921875, + "step": 1293 + }, + { + "epoch": 0.08746789238880627, + "grad_norm": 0.7116652131080627, + "learning_rate": 2.9779191076950456e-05, + "loss": 0.1348876953125, + "step": 1294 + }, + { + "epoch": 0.08753548735974044, + "grad_norm": 3.034026861190796, + "learning_rate": 2.9778626659012254e-05, + "loss": 0.33502197265625, + "step": 1295 + }, + { + "epoch": 0.0876030823306746, + "grad_norm": 1.723760724067688, + "learning_rate": 2.9778061525992182e-05, + "loss": 0.14849090576171875, + "step": 1296 + }, + { + "epoch": 0.08767067730160875, + "grad_norm": 0.3062436878681183, + "learning_rate": 2.9777495677917592e-05, + "loss": 0.03626251220703125, + "step": 1297 + }, + { + "epoch": 0.08773827227254292, + "grad_norm": 1.6775259971618652, + "learning_rate": 2.9776929114815864e-05, + "loss": 0.15716552734375, + "step": 1298 + }, + { + "epoch": 0.08780586724347708, + "grad_norm": 1.1017019748687744, + "learning_rate": 2.9776361836714404e-05, + "loss": 0.174713134765625, + "step": 1299 + }, + { + "epoch": 0.08787346221441125, + "grad_norm": 1.012880563735962, + "learning_rate": 2.977579384364067e-05, + "loss": 0.13542556762695312, + "step": 1300 + }, + { + "epoch": 0.08794105718534541, + "grad_norm": 0.35763019323349, + "learning_rate": 2.9775225135622136e-05, + "loss": 0.0478363037109375, + "step": 1301 + }, + { + "epoch": 0.08800865215627958, + "grad_norm": 0.5767934918403625, + "learning_rate": 2.9774655712686324e-05, + "loss": 0.13714599609375, + "step": 1302 + }, + { + "epoch": 0.08807624712721374, + "grad_norm": 0.4093684256076813, + "learning_rate": 2.9774085574860786e-05, + "loss": 0.036426544189453125, + "step": 1303 + }, + { + "epoch": 0.08814384209814789, + "grad_norm": 0.8364496231079102, + "learning_rate": 2.977351472217311e-05, + "loss": 0.128509521484375, + "step": 1304 + }, + { + "epoch": 0.08821143706908206, + "grad_norm": 3.0184192657470703, + "learning_rate": 2.9772943154650918e-05, + "loss": 0.250091552734375, + "step": 1305 + }, + { + "epoch": 0.08827903204001622, + "grad_norm": 0.39316871762275696, + "learning_rate": 2.977237087232186e-05, + "loss": 0.08010101318359375, + "step": 1306 + }, + { + "epoch": 0.08834662701095039, + "grad_norm": 0.2939225733280182, + "learning_rate": 2.9771797875213635e-05, + "loss": 0.05490875244140625, + "step": 1307 + }, + { + "epoch": 0.08841422198188455, + "grad_norm": 1.7241536378860474, + "learning_rate": 2.9771224163353963e-05, + "loss": 0.195587158203125, + "step": 1308 + }, + { + "epoch": 0.0884818169528187, + "grad_norm": 0.43001458048820496, + "learning_rate": 2.9770649736770603e-05, + "loss": 0.04587554931640625, + "step": 1309 + }, + { + "epoch": 0.08854941192375287, + "grad_norm": 0.4223093092441559, + "learning_rate": 2.977007459549135e-05, + "loss": 0.0709228515625, + "step": 1310 + }, + { + "epoch": 0.08861700689468703, + "grad_norm": 1.2086726427078247, + "learning_rate": 2.9769498739544037e-05, + "loss": 0.1434173583984375, + "step": 1311 + }, + { + "epoch": 0.0886846018656212, + "grad_norm": 0.5762428641319275, + "learning_rate": 2.9768922168956523e-05, + "loss": 0.069305419921875, + "step": 1312 + }, + { + "epoch": 0.08875219683655536, + "grad_norm": 0.6908305287361145, + "learning_rate": 2.9768344883756706e-05, + "loss": 0.07616233825683594, + "step": 1313 + }, + { + "epoch": 0.08881979180748953, + "grad_norm": 1.3476790189743042, + "learning_rate": 2.9767766883972525e-05, + "loss": 0.1796722412109375, + "step": 1314 + }, + { + "epoch": 0.08888738677842369, + "grad_norm": 0.6779069900512695, + "learning_rate": 2.9767188169631937e-05, + "loss": 0.07867813110351562, + "step": 1315 + }, + { + "epoch": 0.08895498174935784, + "grad_norm": 0.39937421679496765, + "learning_rate": 2.976660874076295e-05, + "loss": 0.05921173095703125, + "step": 1316 + }, + { + "epoch": 0.08902257672029201, + "grad_norm": 0.6178044080734253, + "learning_rate": 2.9766028597393604e-05, + "loss": 0.03977203369140625, + "step": 1317 + }, + { + "epoch": 0.08909017169122617, + "grad_norm": 0.4811696410179138, + "learning_rate": 2.976544773955196e-05, + "loss": 0.072845458984375, + "step": 1318 + }, + { + "epoch": 0.08915776666216034, + "grad_norm": 0.5796242952346802, + "learning_rate": 2.976486616726613e-05, + "loss": 0.052703857421875, + "step": 1319 + }, + { + "epoch": 0.0892253616330945, + "grad_norm": 0.7454965710639954, + "learning_rate": 2.976428388056426e-05, + "loss": 0.10506439208984375, + "step": 1320 + }, + { + "epoch": 0.08929295660402867, + "grad_norm": 3.5694074630737305, + "learning_rate": 2.976370087947451e-05, + "loss": 0.1835479736328125, + "step": 1321 + }, + { + "epoch": 0.08936055157496282, + "grad_norm": 1.1565674543380737, + "learning_rate": 2.97631171640251e-05, + "loss": 0.18121337890625, + "step": 1322 + }, + { + "epoch": 0.08942814654589698, + "grad_norm": 0.4581379294395447, + "learning_rate": 2.9762532734244272e-05, + "loss": 0.0687255859375, + "step": 1323 + }, + { + "epoch": 0.08949574151683115, + "grad_norm": 0.8648463487625122, + "learning_rate": 2.9761947590160298e-05, + "loss": 0.1665191650390625, + "step": 1324 + }, + { + "epoch": 0.08956333648776531, + "grad_norm": 1.6495792865753174, + "learning_rate": 2.97613617318015e-05, + "loss": 0.1127471923828125, + "step": 1325 + }, + { + "epoch": 0.08963093145869948, + "grad_norm": 3.2447750568389893, + "learning_rate": 2.9760775159196224e-05, + "loss": 0.1906890869140625, + "step": 1326 + }, + { + "epoch": 0.08969852642963363, + "grad_norm": 1.522300362586975, + "learning_rate": 2.9760187872372847e-05, + "loss": 0.13480377197265625, + "step": 1327 + }, + { + "epoch": 0.08976612140056779, + "grad_norm": 0.7869395613670349, + "learning_rate": 2.9759599871359787e-05, + "loss": 0.12397193908691406, + "step": 1328 + }, + { + "epoch": 0.08983371637150196, + "grad_norm": 0.8729008436203003, + "learning_rate": 2.9759011156185497e-05, + "loss": 0.0819854736328125, + "step": 1329 + }, + { + "epoch": 0.08990131134243612, + "grad_norm": 0.6624216437339783, + "learning_rate": 2.975842172687846e-05, + "loss": 0.10900115966796875, + "step": 1330 + }, + { + "epoch": 0.08996890631337029, + "grad_norm": 0.5508986711502075, + "learning_rate": 2.97578315834672e-05, + "loss": 0.1262664794921875, + "step": 1331 + }, + { + "epoch": 0.09003650128430445, + "grad_norm": 0.9102309942245483, + "learning_rate": 2.975724072598027e-05, + "loss": 0.1159515380859375, + "step": 1332 + }, + { + "epoch": 0.09010409625523862, + "grad_norm": 1.316877007484436, + "learning_rate": 2.975664915444626e-05, + "loss": 0.09192657470703125, + "step": 1333 + }, + { + "epoch": 0.09017169122617277, + "grad_norm": 1.1928707361221313, + "learning_rate": 2.975605686889379e-05, + "loss": 0.17380523681640625, + "step": 1334 + }, + { + "epoch": 0.09023928619710693, + "grad_norm": 0.7090555429458618, + "learning_rate": 2.9755463869351526e-05, + "loss": 0.044673919677734375, + "step": 1335 + }, + { + "epoch": 0.0903068811680411, + "grad_norm": 0.20378555357456207, + "learning_rate": 2.9754870155848157e-05, + "loss": 0.031045913696289062, + "step": 1336 + }, + { + "epoch": 0.09037447613897526, + "grad_norm": 1.9209566116333008, + "learning_rate": 2.9754275728412406e-05, + "loss": 0.15582275390625, + "step": 1337 + }, + { + "epoch": 0.09044207110990943, + "grad_norm": 0.8888643383979797, + "learning_rate": 2.975368058707304e-05, + "loss": 0.085205078125, + "step": 1338 + }, + { + "epoch": 0.09050966608084358, + "grad_norm": 0.2519262433052063, + "learning_rate": 2.975308473185886e-05, + "loss": 0.039386749267578125, + "step": 1339 + }, + { + "epoch": 0.09057726105177774, + "grad_norm": 1.0810391902923584, + "learning_rate": 2.975248816279869e-05, + "loss": 0.12325286865234375, + "step": 1340 + }, + { + "epoch": 0.09064485602271191, + "grad_norm": 0.2645905017852783, + "learning_rate": 2.9751890879921395e-05, + "loss": 0.024883270263671875, + "step": 1341 + }, + { + "epoch": 0.09071245099364607, + "grad_norm": 1.728817105293274, + "learning_rate": 2.9751292883255883e-05, + "loss": 0.1390380859375, + "step": 1342 + }, + { + "epoch": 0.09078004596458024, + "grad_norm": 0.7759442925453186, + "learning_rate": 2.975069417283108e-05, + "loss": 0.14530181884765625, + "step": 1343 + }, + { + "epoch": 0.0908476409355144, + "grad_norm": 0.3774062395095825, + "learning_rate": 2.975009474867596e-05, + "loss": 0.068206787109375, + "step": 1344 + }, + { + "epoch": 0.09091523590644857, + "grad_norm": 0.7968466877937317, + "learning_rate": 2.9749494610819528e-05, + "loss": 0.049610137939453125, + "step": 1345 + }, + { + "epoch": 0.09098283087738272, + "grad_norm": 0.8082307577133179, + "learning_rate": 2.9748893759290817e-05, + "loss": 0.085479736328125, + "step": 1346 + }, + { + "epoch": 0.09105042584831688, + "grad_norm": 1.3126106262207031, + "learning_rate": 2.974829219411891e-05, + "loss": 0.1831512451171875, + "step": 1347 + }, + { + "epoch": 0.09111802081925105, + "grad_norm": 1.5769503116607666, + "learning_rate": 2.9747689915332903e-05, + "loss": 0.198760986328125, + "step": 1348 + }, + { + "epoch": 0.0911856157901852, + "grad_norm": 1.1001299619674683, + "learning_rate": 2.9747086922961943e-05, + "loss": 0.21514892578125, + "step": 1349 + }, + { + "epoch": 0.09125321076111938, + "grad_norm": 0.9861624240875244, + "learning_rate": 2.974648321703521e-05, + "loss": 0.1405792236328125, + "step": 1350 + }, + { + "epoch": 0.09132080573205353, + "grad_norm": 1.4792431592941284, + "learning_rate": 2.9745878797581906e-05, + "loss": 0.18511962890625, + "step": 1351 + }, + { + "epoch": 0.0913884007029877, + "grad_norm": 0.3844854533672333, + "learning_rate": 2.9745273664631284e-05, + "loss": 0.06000518798828125, + "step": 1352 + }, + { + "epoch": 0.09145599567392186, + "grad_norm": 2.037497043609619, + "learning_rate": 2.9744667818212624e-05, + "loss": 0.11658096313476562, + "step": 1353 + }, + { + "epoch": 0.09152359064485602, + "grad_norm": 2.0876851081848145, + "learning_rate": 2.9744061258355236e-05, + "loss": 0.208587646484375, + "step": 1354 + }, + { + "epoch": 0.09159118561579019, + "grad_norm": 2.0192434787750244, + "learning_rate": 2.9743453985088478e-05, + "loss": 0.21553802490234375, + "step": 1355 + }, + { + "epoch": 0.09165878058672434, + "grad_norm": 0.8236271739006042, + "learning_rate": 2.9742845998441722e-05, + "loss": 0.1371307373046875, + "step": 1356 + }, + { + "epoch": 0.09172637555765852, + "grad_norm": 2.234147548675537, + "learning_rate": 2.9742237298444398e-05, + "loss": 0.335723876953125, + "step": 1357 + }, + { + "epoch": 0.09179397052859267, + "grad_norm": 2.8095738887786865, + "learning_rate": 2.9741627885125944e-05, + "loss": 0.27886962890625, + "step": 1358 + }, + { + "epoch": 0.09186156549952683, + "grad_norm": 1.2954596281051636, + "learning_rate": 2.974101775851586e-05, + "loss": 0.11492919921875, + "step": 1359 + }, + { + "epoch": 0.091929160470461, + "grad_norm": 1.4027283191680908, + "learning_rate": 2.9740406918643664e-05, + "loss": 0.22705078125, + "step": 1360 + }, + { + "epoch": 0.09199675544139516, + "grad_norm": 2.9689383506774902, + "learning_rate": 2.9739795365538914e-05, + "loss": 0.211456298828125, + "step": 1361 + }, + { + "epoch": 0.09206435041232933, + "grad_norm": 0.5503345727920532, + "learning_rate": 2.9739183099231192e-05, + "loss": 0.121246337890625, + "step": 1362 + }, + { + "epoch": 0.09213194538326348, + "grad_norm": 0.8923593163490295, + "learning_rate": 2.973857011975013e-05, + "loss": 0.09320068359375, + "step": 1363 + }, + { + "epoch": 0.09219954035419765, + "grad_norm": 0.9077703356742859, + "learning_rate": 2.9737956427125395e-05, + "loss": 0.09674072265625, + "step": 1364 + }, + { + "epoch": 0.09226713532513181, + "grad_norm": 0.32987844944000244, + "learning_rate": 2.973734202138667e-05, + "loss": 0.04378700256347656, + "step": 1365 + }, + { + "epoch": 0.09233473029606597, + "grad_norm": 3.7365365028381348, + "learning_rate": 2.9736726902563686e-05, + "loss": 0.239013671875, + "step": 1366 + }, + { + "epoch": 0.09240232526700014, + "grad_norm": 3.3612189292907715, + "learning_rate": 2.973611107068621e-05, + "loss": 0.24285888671875, + "step": 1367 + }, + { + "epoch": 0.0924699202379343, + "grad_norm": 1.2158023118972778, + "learning_rate": 2.9735494525784034e-05, + "loss": 0.214813232421875, + "step": 1368 + }, + { + "epoch": 0.09253751520886846, + "grad_norm": 0.7696966528892517, + "learning_rate": 2.9734877267886996e-05, + "loss": 0.1065826416015625, + "step": 1369 + }, + { + "epoch": 0.09260511017980262, + "grad_norm": 0.9881528615951538, + "learning_rate": 2.9734259297024963e-05, + "loss": 0.08994293212890625, + "step": 1370 + }, + { + "epoch": 0.09267270515073678, + "grad_norm": 2.6971585750579834, + "learning_rate": 2.9733640613227827e-05, + "loss": 0.196380615234375, + "step": 1371 + }, + { + "epoch": 0.09274030012167095, + "grad_norm": 0.9472489356994629, + "learning_rate": 2.9733021216525537e-05, + "loss": 0.0804901123046875, + "step": 1372 + }, + { + "epoch": 0.0928078950926051, + "grad_norm": 1.8170472383499146, + "learning_rate": 2.9732401106948053e-05, + "loss": 0.12085723876953125, + "step": 1373 + }, + { + "epoch": 0.09287549006353928, + "grad_norm": 1.562515377998352, + "learning_rate": 2.9731780284525385e-05, + "loss": 0.19156646728515625, + "step": 1374 + }, + { + "epoch": 0.09294308503447343, + "grad_norm": 1.5690046548843384, + "learning_rate": 2.9731158749287574e-05, + "loss": 0.21695709228515625, + "step": 1375 + }, + { + "epoch": 0.0930106800054076, + "grad_norm": 1.202652931213379, + "learning_rate": 2.9730536501264684e-05, + "loss": 0.19586181640625, + "step": 1376 + }, + { + "epoch": 0.09307827497634176, + "grad_norm": 0.7604442238807678, + "learning_rate": 2.9729913540486835e-05, + "loss": 0.1158905029296875, + "step": 1377 + }, + { + "epoch": 0.09314586994727592, + "grad_norm": 0.782812237739563, + "learning_rate": 2.972928986698416e-05, + "loss": 0.127471923828125, + "step": 1378 + }, + { + "epoch": 0.09321346491821009, + "grad_norm": 0.8878123164176941, + "learning_rate": 2.9728665480786848e-05, + "loss": 0.17950439453125, + "step": 1379 + }, + { + "epoch": 0.09328105988914424, + "grad_norm": 1.2569007873535156, + "learning_rate": 2.97280403819251e-05, + "loss": 0.1011962890625, + "step": 1380 + }, + { + "epoch": 0.09334865486007841, + "grad_norm": 0.3792286217212677, + "learning_rate": 2.9727414570429162e-05, + "loss": 0.032848358154296875, + "step": 1381 + }, + { + "epoch": 0.09341624983101257, + "grad_norm": 0.4449114501476288, + "learning_rate": 2.9726788046329323e-05, + "loss": 0.0894775390625, + "step": 1382 + }, + { + "epoch": 0.09348384480194674, + "grad_norm": 1.1539627313613892, + "learning_rate": 2.972616080965589e-05, + "loss": 0.173004150390625, + "step": 1383 + }, + { + "epoch": 0.0935514397728809, + "grad_norm": 1.3576452732086182, + "learning_rate": 2.972553286043922e-05, + "loss": 0.12911224365234375, + "step": 1384 + }, + { + "epoch": 0.09361903474381505, + "grad_norm": 1.412084698677063, + "learning_rate": 2.972490419870969e-05, + "loss": 0.1213531494140625, + "step": 1385 + }, + { + "epoch": 0.09368662971474923, + "grad_norm": 0.22099925577640533, + "learning_rate": 2.9724274824497722e-05, + "loss": 0.02988433837890625, + "step": 1386 + }, + { + "epoch": 0.09375422468568338, + "grad_norm": 1.0214548110961914, + "learning_rate": 2.972364473783377e-05, + "loss": 0.075408935546875, + "step": 1387 + }, + { + "epoch": 0.09382181965661755, + "grad_norm": 0.6403893232345581, + "learning_rate": 2.972301393874832e-05, + "loss": 0.1089630126953125, + "step": 1388 + }, + { + "epoch": 0.09388941462755171, + "grad_norm": 0.930510401725769, + "learning_rate": 2.9722382427271895e-05, + "loss": 0.1668243408203125, + "step": 1389 + }, + { + "epoch": 0.09395700959848587, + "grad_norm": 0.4501977264881134, + "learning_rate": 2.9721750203435044e-05, + "loss": 0.04669952392578125, + "step": 1390 + }, + { + "epoch": 0.09402460456942004, + "grad_norm": 0.7206888198852539, + "learning_rate": 2.9721117267268374e-05, + "loss": 0.04978179931640625, + "step": 1391 + }, + { + "epoch": 0.0940921995403542, + "grad_norm": 1.6091865301132202, + "learning_rate": 2.9720483618802496e-05, + "loss": 0.156158447265625, + "step": 1392 + }, + { + "epoch": 0.09415979451128836, + "grad_norm": 0.5133570432662964, + "learning_rate": 2.9719849258068076e-05, + "loss": 0.03993988037109375, + "step": 1393 + }, + { + "epoch": 0.09422738948222252, + "grad_norm": 0.6708387136459351, + "learning_rate": 2.9719214185095805e-05, + "loss": 0.1080322265625, + "step": 1394 + }, + { + "epoch": 0.09429498445315669, + "grad_norm": 1.3061120510101318, + "learning_rate": 2.9718578399916415e-05, + "loss": 0.217681884765625, + "step": 1395 + }, + { + "epoch": 0.09436257942409085, + "grad_norm": 0.7177231907844543, + "learning_rate": 2.9717941902560667e-05, + "loss": 0.130584716796875, + "step": 1396 + }, + { + "epoch": 0.094430174395025, + "grad_norm": 0.8667570948600769, + "learning_rate": 2.9717304693059364e-05, + "loss": 0.0695648193359375, + "step": 1397 + }, + { + "epoch": 0.09449776936595917, + "grad_norm": 0.6383411884307861, + "learning_rate": 2.971666677144333e-05, + "loss": 0.1131744384765625, + "step": 1398 + }, + { + "epoch": 0.09456536433689333, + "grad_norm": 1.2511183023452759, + "learning_rate": 2.9716028137743442e-05, + "loss": 0.16115570068359375, + "step": 1399 + }, + { + "epoch": 0.0946329593078275, + "grad_norm": 1.0827360153198242, + "learning_rate": 2.971538879199059e-05, + "loss": 0.214996337890625, + "step": 1400 + }, + { + "epoch": 0.09470055427876166, + "grad_norm": 2.4898126125335693, + "learning_rate": 2.9714748734215714e-05, + "loss": 0.275787353515625, + "step": 1401 + }, + { + "epoch": 0.09476814924969583, + "grad_norm": 0.32656073570251465, + "learning_rate": 2.9714107964449782e-05, + "loss": 0.0729522705078125, + "step": 1402 + }, + { + "epoch": 0.09483574422062999, + "grad_norm": 1.0549904108047485, + "learning_rate": 2.9713466482723802e-05, + "loss": 0.174713134765625, + "step": 1403 + }, + { + "epoch": 0.09490333919156414, + "grad_norm": 1.1946961879730225, + "learning_rate": 2.9712824289068814e-05, + "loss": 0.20501708984375, + "step": 1404 + }, + { + "epoch": 0.09497093416249831, + "grad_norm": 1.0518321990966797, + "learning_rate": 2.9712181383515886e-05, + "loss": 0.247314453125, + "step": 1405 + }, + { + "epoch": 0.09503852913343247, + "grad_norm": 1.1428358554840088, + "learning_rate": 2.9711537766096133e-05, + "loss": 0.2099609375, + "step": 1406 + }, + { + "epoch": 0.09510612410436664, + "grad_norm": 0.5087500810623169, + "learning_rate": 2.971089343684069e-05, + "loss": 0.06343841552734375, + "step": 1407 + }, + { + "epoch": 0.0951737190753008, + "grad_norm": 1.994251012802124, + "learning_rate": 2.9710248395780732e-05, + "loss": 0.31787109375, + "step": 1408 + }, + { + "epoch": 0.09524131404623495, + "grad_norm": 0.6023617386817932, + "learning_rate": 2.970960264294748e-05, + "loss": 0.0876617431640625, + "step": 1409 + }, + { + "epoch": 0.09530890901716912, + "grad_norm": 4.248227596282959, + "learning_rate": 2.970895617837217e-05, + "loss": 0.2904052734375, + "step": 1410 + }, + { + "epoch": 0.09537650398810328, + "grad_norm": 0.4928339719772339, + "learning_rate": 2.9708309002086092e-05, + "loss": 0.06890869140625, + "step": 1411 + }, + { + "epoch": 0.09544409895903745, + "grad_norm": 1.8904529809951782, + "learning_rate": 2.9707661114120553e-05, + "loss": 0.2288360595703125, + "step": 1412 + }, + { + "epoch": 0.09551169392997161, + "grad_norm": 0.9706199169158936, + "learning_rate": 2.97070125145069e-05, + "loss": 0.15106201171875, + "step": 1413 + }, + { + "epoch": 0.09557928890090578, + "grad_norm": 0.5579634308815002, + "learning_rate": 2.970636320327652e-05, + "loss": 0.110015869140625, + "step": 1414 + }, + { + "epoch": 0.09564688387183994, + "grad_norm": 3.041527271270752, + "learning_rate": 2.9705713180460835e-05, + "loss": 0.17597579956054688, + "step": 1415 + }, + { + "epoch": 0.09571447884277409, + "grad_norm": 2.2164249420166016, + "learning_rate": 2.9705062446091288e-05, + "loss": 0.205718994140625, + "step": 1416 + }, + { + "epoch": 0.09578207381370826, + "grad_norm": 1.068399429321289, + "learning_rate": 2.970441100019937e-05, + "loss": 0.11199951171875, + "step": 1417 + }, + { + "epoch": 0.09584966878464242, + "grad_norm": 6.401825904846191, + "learning_rate": 2.9703758842816604e-05, + "loss": 0.346527099609375, + "step": 1418 + }, + { + "epoch": 0.09591726375557659, + "grad_norm": 1.6073360443115234, + "learning_rate": 2.9703105973974543e-05, + "loss": 0.154022216796875, + "step": 1419 + }, + { + "epoch": 0.09598485872651075, + "grad_norm": 1.4608865976333618, + "learning_rate": 2.9702452393704777e-05, + "loss": 0.07769775390625, + "step": 1420 + }, + { + "epoch": 0.0960524536974449, + "grad_norm": 0.8589938879013062, + "learning_rate": 2.970179810203893e-05, + "loss": 0.1285400390625, + "step": 1421 + }, + { + "epoch": 0.09612004866837907, + "grad_norm": 2.334901809692383, + "learning_rate": 2.970114309900866e-05, + "loss": 0.188934326171875, + "step": 1422 + }, + { + "epoch": 0.09618764363931323, + "grad_norm": 3.6397125720977783, + "learning_rate": 2.9700487384645667e-05, + "loss": 0.206024169921875, + "step": 1423 + }, + { + "epoch": 0.0962552386102474, + "grad_norm": 2.096461296081543, + "learning_rate": 2.9699830958981672e-05, + "loss": 0.233123779296875, + "step": 1424 + }, + { + "epoch": 0.09632283358118156, + "grad_norm": 1.3012359142303467, + "learning_rate": 2.969917382204843e-05, + "loss": 0.27239990234375, + "step": 1425 + }, + { + "epoch": 0.09639042855211573, + "grad_norm": 2.1192867755889893, + "learning_rate": 2.9698515973877752e-05, + "loss": 0.156280517578125, + "step": 1426 + }, + { + "epoch": 0.09645802352304989, + "grad_norm": 3.6958999633789062, + "learning_rate": 2.9697857414501463e-05, + "loss": 0.1825714111328125, + "step": 1427 + }, + { + "epoch": 0.09652561849398404, + "grad_norm": 3.794975996017456, + "learning_rate": 2.9697198143951425e-05, + "loss": 0.1773529052734375, + "step": 1428 + }, + { + "epoch": 0.09659321346491821, + "grad_norm": 5.611673831939697, + "learning_rate": 2.969653816225954e-05, + "loss": 0.26800537109375, + "step": 1429 + }, + { + "epoch": 0.09666080843585237, + "grad_norm": 1.9642959833145142, + "learning_rate": 2.969587746945774e-05, + "loss": 0.130523681640625, + "step": 1430 + }, + { + "epoch": 0.09672840340678654, + "grad_norm": 3.1247594356536865, + "learning_rate": 2.9695216065577996e-05, + "loss": 0.233154296875, + "step": 1431 + }, + { + "epoch": 0.0967959983777207, + "grad_norm": 0.4959847331047058, + "learning_rate": 2.9694553950652317e-05, + "loss": 0.07472991943359375, + "step": 1432 + }, + { + "epoch": 0.09686359334865487, + "grad_norm": 0.46313798427581787, + "learning_rate": 2.9693891124712726e-05, + "loss": 0.10022735595703125, + "step": 1433 + }, + { + "epoch": 0.09693118831958902, + "grad_norm": 1.7830238342285156, + "learning_rate": 2.96932275877913e-05, + "loss": 0.12290573120117188, + "step": 1434 + }, + { + "epoch": 0.09699878329052318, + "grad_norm": 3.503053903579712, + "learning_rate": 2.9692563339920152e-05, + "loss": 0.19854736328125, + "step": 1435 + }, + { + "epoch": 0.09706637826145735, + "grad_norm": 1.4433391094207764, + "learning_rate": 2.9691898381131417e-05, + "loss": 0.11460113525390625, + "step": 1436 + }, + { + "epoch": 0.09713397323239151, + "grad_norm": 0.563029944896698, + "learning_rate": 2.9691232711457273e-05, + "loss": 0.036426544189453125, + "step": 1437 + }, + { + "epoch": 0.09720156820332568, + "grad_norm": 0.7747427821159363, + "learning_rate": 2.969056633092992e-05, + "loss": 0.07260894775390625, + "step": 1438 + }, + { + "epoch": 0.09726916317425983, + "grad_norm": 0.6308704614639282, + "learning_rate": 2.968989923958161e-05, + "loss": 0.1139068603515625, + "step": 1439 + }, + { + "epoch": 0.09733675814519399, + "grad_norm": 0.3990083336830139, + "learning_rate": 2.9689231437444624e-05, + "loss": 0.041797637939453125, + "step": 1440 + }, + { + "epoch": 0.09740435311612816, + "grad_norm": 1.1911208629608154, + "learning_rate": 2.9688562924551266e-05, + "loss": 0.1409149169921875, + "step": 1441 + }, + { + "epoch": 0.09747194808706232, + "grad_norm": 1.252893090248108, + "learning_rate": 2.968789370093389e-05, + "loss": 0.15650177001953125, + "step": 1442 + }, + { + "epoch": 0.09753954305799649, + "grad_norm": 0.603314995765686, + "learning_rate": 2.968722376662487e-05, + "loss": 0.0677032470703125, + "step": 1443 + }, + { + "epoch": 0.09760713802893065, + "grad_norm": 2.8679072856903076, + "learning_rate": 2.9686553121656624e-05, + "loss": 0.21331787109375, + "step": 1444 + }, + { + "epoch": 0.09767473299986482, + "grad_norm": 1.631678581237793, + "learning_rate": 2.968588176606161e-05, + "loss": 0.1624908447265625, + "step": 1445 + }, + { + "epoch": 0.09774232797079897, + "grad_norm": 1.0237812995910645, + "learning_rate": 2.9685209699872302e-05, + "loss": 0.13714599609375, + "step": 1446 + }, + { + "epoch": 0.09780992294173313, + "grad_norm": 1.4434282779693604, + "learning_rate": 2.968453692312122e-05, + "loss": 0.174713134765625, + "step": 1447 + }, + { + "epoch": 0.0978775179126673, + "grad_norm": 0.5072821378707886, + "learning_rate": 2.9683863435840923e-05, + "loss": 0.10076904296875, + "step": 1448 + }, + { + "epoch": 0.09794511288360146, + "grad_norm": 1.2218255996704102, + "learning_rate": 2.968318923806399e-05, + "loss": 0.14984130859375, + "step": 1449 + }, + { + "epoch": 0.09801270785453563, + "grad_norm": 1.1206090450286865, + "learning_rate": 2.9682514329823053e-05, + "loss": 0.1982421875, + "step": 1450 + }, + { + "epoch": 0.09808030282546978, + "grad_norm": 0.9842045903205872, + "learning_rate": 2.9681838711150758e-05, + "loss": 0.18475341796875, + "step": 1451 + }, + { + "epoch": 0.09814789779640394, + "grad_norm": 0.4409520924091339, + "learning_rate": 2.9681162382079804e-05, + "loss": 0.04141998291015625, + "step": 1452 + }, + { + "epoch": 0.09821549276733811, + "grad_norm": 1.648484230041504, + "learning_rate": 2.9680485342642913e-05, + "loss": 0.19698333740234375, + "step": 1453 + }, + { + "epoch": 0.09828308773827227, + "grad_norm": 1.796099305152893, + "learning_rate": 2.9679807592872847e-05, + "loss": 0.16713714599609375, + "step": 1454 + }, + { + "epoch": 0.09835068270920644, + "grad_norm": 0.9728620648384094, + "learning_rate": 2.9679129132802394e-05, + "loss": 0.0754241943359375, + "step": 1455 + }, + { + "epoch": 0.0984182776801406, + "grad_norm": 0.7987562417984009, + "learning_rate": 2.9678449962464386e-05, + "loss": 0.09902572631835938, + "step": 1456 + }, + { + "epoch": 0.09848587265107477, + "grad_norm": 1.0233919620513916, + "learning_rate": 2.967777008189168e-05, + "loss": 0.17193603515625, + "step": 1457 + }, + { + "epoch": 0.09855346762200892, + "grad_norm": 1.2854770421981812, + "learning_rate": 2.9677089491117185e-05, + "loss": 0.222320556640625, + "step": 1458 + }, + { + "epoch": 0.09862106259294308, + "grad_norm": 1.2439464330673218, + "learning_rate": 2.967640819017382e-05, + "loss": 0.1556549072265625, + "step": 1459 + }, + { + "epoch": 0.09868865756387725, + "grad_norm": 1.4703466892242432, + "learning_rate": 2.9675726179094555e-05, + "loss": 0.23504638671875, + "step": 1460 + }, + { + "epoch": 0.0987562525348114, + "grad_norm": 1.1101058721542358, + "learning_rate": 2.967504345791239e-05, + "loss": 0.16827392578125, + "step": 1461 + }, + { + "epoch": 0.09882384750574558, + "grad_norm": 0.7119890451431274, + "learning_rate": 2.967436002666036e-05, + "loss": 0.05176544189453125, + "step": 1462 + }, + { + "epoch": 0.09889144247667973, + "grad_norm": 1.486289143562317, + "learning_rate": 2.9673675885371534e-05, + "loss": 0.197357177734375, + "step": 1463 + }, + { + "epoch": 0.0989590374476139, + "grad_norm": 0.9680827260017395, + "learning_rate": 2.9672991034079013e-05, + "loss": 0.062469482421875, + "step": 1464 + }, + { + "epoch": 0.09902663241854806, + "grad_norm": 1.3892024755477905, + "learning_rate": 2.9672305472815933e-05, + "loss": 0.2645263671875, + "step": 1465 + }, + { + "epoch": 0.09909422738948222, + "grad_norm": 1.9910848140716553, + "learning_rate": 2.9671619201615472e-05, + "loss": 0.1583099365234375, + "step": 1466 + }, + { + "epoch": 0.09916182236041639, + "grad_norm": 1.165511131286621, + "learning_rate": 2.967093222051083e-05, + "loss": 0.09793853759765625, + "step": 1467 + }, + { + "epoch": 0.09922941733135054, + "grad_norm": 1.1719236373901367, + "learning_rate": 2.967024452953525e-05, + "loss": 0.0943756103515625, + "step": 1468 + }, + { + "epoch": 0.09929701230228472, + "grad_norm": 0.2717887759208679, + "learning_rate": 2.966955612872201e-05, + "loss": 0.040561676025390625, + "step": 1469 + }, + { + "epoch": 0.09936460727321887, + "grad_norm": 0.6897019743919373, + "learning_rate": 2.9668867018104407e-05, + "loss": 0.064666748046875, + "step": 1470 + }, + { + "epoch": 0.09943220224415303, + "grad_norm": 1.5760283470153809, + "learning_rate": 2.96681771977158e-05, + "loss": 0.184112548828125, + "step": 1471 + }, + { + "epoch": 0.0994997972150872, + "grad_norm": 1.1091772317886353, + "learning_rate": 2.9667486667589558e-05, + "loss": 0.17305374145507812, + "step": 1472 + }, + { + "epoch": 0.09956739218602136, + "grad_norm": 1.0871005058288574, + "learning_rate": 2.9666795427759097e-05, + "loss": 0.1798858642578125, + "step": 1473 + }, + { + "epoch": 0.09963498715695553, + "grad_norm": 1.4015367031097412, + "learning_rate": 2.966610347825786e-05, + "loss": 0.1126861572265625, + "step": 1474 + }, + { + "epoch": 0.09970258212788968, + "grad_norm": 0.7933489680290222, + "learning_rate": 2.966541081911933e-05, + "loss": 0.14459991455078125, + "step": 1475 + }, + { + "epoch": 0.09977017709882385, + "grad_norm": 1.2965376377105713, + "learning_rate": 2.966471745037702e-05, + "loss": 0.192962646484375, + "step": 1476 + }, + { + "epoch": 0.09983777206975801, + "grad_norm": 1.7319127321243286, + "learning_rate": 2.966402337206448e-05, + "loss": 0.152099609375, + "step": 1477 + }, + { + "epoch": 0.09990536704069217, + "grad_norm": 1.4097976684570312, + "learning_rate": 2.9663328584215294e-05, + "loss": 0.087158203125, + "step": 1478 + }, + { + "epoch": 0.09997296201162634, + "grad_norm": 0.9363474249839783, + "learning_rate": 2.9662633086863083e-05, + "loss": 0.14599609375, + "step": 1479 + }, + { + "epoch": 0.1000405569825605, + "grad_norm": 1.3278911113739014, + "learning_rate": 2.96619368800415e-05, + "loss": 0.2198486328125, + "step": 1480 + }, + { + "epoch": 0.10010815195349466, + "grad_norm": 0.9988247156143188, + "learning_rate": 2.966123996378422e-05, + "loss": 0.082275390625, + "step": 1481 + }, + { + "epoch": 0.10017574692442882, + "grad_norm": 1.8027234077453613, + "learning_rate": 2.966054233812498e-05, + "loss": 0.215545654296875, + "step": 1482 + }, + { + "epoch": 0.10024334189536299, + "grad_norm": 1.3553699254989624, + "learning_rate": 2.965984400309753e-05, + "loss": 0.18174362182617188, + "step": 1483 + }, + { + "epoch": 0.10031093686629715, + "grad_norm": 3.745307207107544, + "learning_rate": 2.9659144958735653e-05, + "loss": 0.339630126953125, + "step": 1484 + }, + { + "epoch": 0.1003785318372313, + "grad_norm": 0.5324168801307678, + "learning_rate": 2.9658445205073185e-05, + "loss": 0.136505126953125, + "step": 1485 + }, + { + "epoch": 0.10044612680816548, + "grad_norm": 1.0496572256088257, + "learning_rate": 2.9657744742143972e-05, + "loss": 0.18792724609375, + "step": 1486 + }, + { + "epoch": 0.10051372177909963, + "grad_norm": 1.997430682182312, + "learning_rate": 2.9657043569981916e-05, + "loss": 0.237548828125, + "step": 1487 + }, + { + "epoch": 0.1005813167500338, + "grad_norm": 0.32701849937438965, + "learning_rate": 2.9656341688620942e-05, + "loss": 0.06179046630859375, + "step": 1488 + }, + { + "epoch": 0.10064891172096796, + "grad_norm": 0.8604071140289307, + "learning_rate": 2.9655639098095012e-05, + "loss": 0.08642959594726562, + "step": 1489 + }, + { + "epoch": 0.10071650669190212, + "grad_norm": 1.1508784294128418, + "learning_rate": 2.9654935798438117e-05, + "loss": 0.1544952392578125, + "step": 1490 + }, + { + "epoch": 0.10078410166283629, + "grad_norm": 1.320467472076416, + "learning_rate": 2.965423178968429e-05, + "loss": 0.1699066162109375, + "step": 1491 + }, + { + "epoch": 0.10085169663377044, + "grad_norm": 1.5985039472579956, + "learning_rate": 2.965352707186759e-05, + "loss": 0.220458984375, + "step": 1492 + }, + { + "epoch": 0.10091929160470461, + "grad_norm": 1.3110584020614624, + "learning_rate": 2.9652821645022128e-05, + "loss": 0.258056640625, + "step": 1493 + }, + { + "epoch": 0.10098688657563877, + "grad_norm": 1.408644676208496, + "learning_rate": 2.9652115509182033e-05, + "loss": 0.25506591796875, + "step": 1494 + }, + { + "epoch": 0.10105448154657294, + "grad_norm": 0.8212028741836548, + "learning_rate": 2.965140866438146e-05, + "loss": 0.1473846435546875, + "step": 1495 + }, + { + "epoch": 0.1011220765175071, + "grad_norm": 0.9905939698219299, + "learning_rate": 2.9650701110654626e-05, + "loss": 0.1172943115234375, + "step": 1496 + }, + { + "epoch": 0.10118967148844125, + "grad_norm": 0.623012125492096, + "learning_rate": 2.9649992848035757e-05, + "loss": 0.09942626953125, + "step": 1497 + }, + { + "epoch": 0.10125726645937543, + "grad_norm": 0.6102086901664734, + "learning_rate": 2.9649283876559128e-05, + "loss": 0.091552734375, + "step": 1498 + }, + { + "epoch": 0.10132486143030958, + "grad_norm": 2.16328763961792, + "learning_rate": 2.9648574196259047e-05, + "loss": 0.14776611328125, + "step": 1499 + }, + { + "epoch": 0.10139245640124375, + "grad_norm": 0.7743080854415894, + "learning_rate": 2.964786380716984e-05, + "loss": 0.1121063232421875, + "step": 1500 + }, + { + "epoch": 0.10146005137217791, + "grad_norm": 0.7499315738677979, + "learning_rate": 2.964715270932589e-05, + "loss": 0.115966796875, + "step": 1501 + }, + { + "epoch": 0.10152764634311207, + "grad_norm": 0.5104019045829773, + "learning_rate": 2.9646440902761607e-05, + "loss": 0.09625244140625, + "step": 1502 + }, + { + "epoch": 0.10159524131404624, + "grad_norm": 1.780185580253601, + "learning_rate": 2.9645728387511424e-05, + "loss": 0.2028961181640625, + "step": 1503 + }, + { + "epoch": 0.10166283628498039, + "grad_norm": 1.8284224271774292, + "learning_rate": 2.964501516360982e-05, + "loss": 0.17572021484375, + "step": 1504 + }, + { + "epoch": 0.10173043125591456, + "grad_norm": 0.863304853439331, + "learning_rate": 2.9644301231091306e-05, + "loss": 0.0974884033203125, + "step": 1505 + }, + { + "epoch": 0.10179802622684872, + "grad_norm": 1.3404788970947266, + "learning_rate": 2.964358658999043e-05, + "loss": 0.214813232421875, + "step": 1506 + }, + { + "epoch": 0.10186562119778289, + "grad_norm": 0.7270538806915283, + "learning_rate": 2.9642871240341765e-05, + "loss": 0.13140869140625, + "step": 1507 + }, + { + "epoch": 0.10193321616871705, + "grad_norm": 0.7165600061416626, + "learning_rate": 2.9642155182179927e-05, + "loss": 0.12938690185546875, + "step": 1508 + }, + { + "epoch": 0.1020008111396512, + "grad_norm": 0.8492223024368286, + "learning_rate": 2.964143841553956e-05, + "loss": 0.1165771484375, + "step": 1509 + }, + { + "epoch": 0.10206840611058537, + "grad_norm": 0.5097042918205261, + "learning_rate": 2.964072094045535e-05, + "loss": 0.07598876953125, + "step": 1510 + }, + { + "epoch": 0.10213600108151953, + "grad_norm": 2.8597145080566406, + "learning_rate": 2.964000275696201e-05, + "loss": 0.16944122314453125, + "step": 1511 + }, + { + "epoch": 0.1022035960524537, + "grad_norm": 0.7870026230812073, + "learning_rate": 2.9639283865094298e-05, + "loss": 0.1987152099609375, + "step": 1512 + }, + { + "epoch": 0.10227119102338786, + "grad_norm": 0.5354544520378113, + "learning_rate": 2.9638564264886982e-05, + "loss": 0.0775299072265625, + "step": 1513 + }, + { + "epoch": 0.10233878599432203, + "grad_norm": 0.3878759741783142, + "learning_rate": 2.96378439563749e-05, + "loss": 0.019668102264404297, + "step": 1514 + }, + { + "epoch": 0.10240638096525619, + "grad_norm": 1.1065733432769775, + "learning_rate": 2.9637122939592887e-05, + "loss": 0.13916778564453125, + "step": 1515 + }, + { + "epoch": 0.10247397593619034, + "grad_norm": 2.3872087001800537, + "learning_rate": 2.9636401214575844e-05, + "loss": 0.28399658203125, + "step": 1516 + }, + { + "epoch": 0.10254157090712451, + "grad_norm": 0.9128104448318481, + "learning_rate": 2.963567878135869e-05, + "loss": 0.17572021484375, + "step": 1517 + }, + { + "epoch": 0.10260916587805867, + "grad_norm": 1.422849416732788, + "learning_rate": 2.9634955639976374e-05, + "loss": 0.29180908203125, + "step": 1518 + }, + { + "epoch": 0.10267676084899284, + "grad_norm": 1.2375391721725464, + "learning_rate": 2.963423179046389e-05, + "loss": 0.222076416015625, + "step": 1519 + }, + { + "epoch": 0.102744355819927, + "grad_norm": 2.6395959854125977, + "learning_rate": 2.9633507232856265e-05, + "loss": 0.2198944091796875, + "step": 1520 + }, + { + "epoch": 0.10281195079086115, + "grad_norm": 2.6795527935028076, + "learning_rate": 2.9632781967188552e-05, + "loss": 0.3001708984375, + "step": 1521 + }, + { + "epoch": 0.10287954576179532, + "grad_norm": 0.6272007822990417, + "learning_rate": 2.9632055993495848e-05, + "loss": 0.1222381591796875, + "step": 1522 + }, + { + "epoch": 0.10294714073272948, + "grad_norm": 0.3115271031856537, + "learning_rate": 2.963132931181328e-05, + "loss": 0.04117584228515625, + "step": 1523 + }, + { + "epoch": 0.10301473570366365, + "grad_norm": 1.1700912714004517, + "learning_rate": 2.963060192217601e-05, + "loss": 0.1560821533203125, + "step": 1524 + }, + { + "epoch": 0.10308233067459781, + "grad_norm": 0.8523319363594055, + "learning_rate": 2.962987382461923e-05, + "loss": 0.1313629150390625, + "step": 1525 + }, + { + "epoch": 0.10314992564553198, + "grad_norm": 0.9985120296478271, + "learning_rate": 2.9629145019178175e-05, + "loss": 0.1184844970703125, + "step": 1526 + }, + { + "epoch": 0.10321752061646614, + "grad_norm": 0.9906893968582153, + "learning_rate": 2.9628415505888104e-05, + "loss": 0.1381378173828125, + "step": 1527 + }, + { + "epoch": 0.10328511558740029, + "grad_norm": 0.3492896854877472, + "learning_rate": 2.962768528478432e-05, + "loss": 0.046566009521484375, + "step": 1528 + }, + { + "epoch": 0.10335271055833446, + "grad_norm": 1.0758682489395142, + "learning_rate": 2.962695435590215e-05, + "loss": 0.1524658203125, + "step": 1529 + }, + { + "epoch": 0.10342030552926862, + "grad_norm": 4.267544269561768, + "learning_rate": 2.9626222719276965e-05, + "loss": 0.26739501953125, + "step": 1530 + }, + { + "epoch": 0.10348790050020279, + "grad_norm": 1.4057221412658691, + "learning_rate": 2.9625490374944168e-05, + "loss": 0.204986572265625, + "step": 1531 + }, + { + "epoch": 0.10355549547113695, + "grad_norm": 1.6836905479431152, + "learning_rate": 2.962475732293919e-05, + "loss": 0.19477081298828125, + "step": 1532 + }, + { + "epoch": 0.1036230904420711, + "grad_norm": 0.4322238564491272, + "learning_rate": 2.9624023563297502e-05, + "loss": 0.035877227783203125, + "step": 1533 + }, + { + "epoch": 0.10369068541300527, + "grad_norm": 0.325101763010025, + "learning_rate": 2.962328909605461e-05, + "loss": 0.04883575439453125, + "step": 1534 + }, + { + "epoch": 0.10375828038393943, + "grad_norm": 1.146054744720459, + "learning_rate": 2.962255392124605e-05, + "loss": 0.1493682861328125, + "step": 1535 + }, + { + "epoch": 0.1038258753548736, + "grad_norm": 3.780669927597046, + "learning_rate": 2.962181803890739e-05, + "loss": 0.2452392578125, + "step": 1536 + }, + { + "epoch": 0.10389347032580776, + "grad_norm": 1.3548940420150757, + "learning_rate": 2.9621081449074247e-05, + "loss": 0.1806488037109375, + "step": 1537 + }, + { + "epoch": 0.10396106529674193, + "grad_norm": 2.2716472148895264, + "learning_rate": 2.962034415178225e-05, + "loss": 0.17867279052734375, + "step": 1538 + }, + { + "epoch": 0.10402866026767608, + "grad_norm": 0.6523269414901733, + "learning_rate": 2.9619606147067085e-05, + "loss": 0.077056884765625, + "step": 1539 + }, + { + "epoch": 0.10409625523861024, + "grad_norm": 0.8139375448226929, + "learning_rate": 2.9618867434964454e-05, + "loss": 0.07373046875, + "step": 1540 + }, + { + "epoch": 0.10416385020954441, + "grad_norm": 1.3533917665481567, + "learning_rate": 2.9618128015510105e-05, + "loss": 0.119354248046875, + "step": 1541 + }, + { + "epoch": 0.10423144518047857, + "grad_norm": 1.5050865411758423, + "learning_rate": 2.961738788873981e-05, + "loss": 0.1769256591796875, + "step": 1542 + }, + { + "epoch": 0.10429904015141274, + "grad_norm": 0.71701979637146, + "learning_rate": 2.961664705468939e-05, + "loss": 0.150665283203125, + "step": 1543 + }, + { + "epoch": 0.1043666351223469, + "grad_norm": 2.456022024154663, + "learning_rate": 2.961590551339468e-05, + "loss": 0.1866455078125, + "step": 1544 + }, + { + "epoch": 0.10443423009328107, + "grad_norm": 0.7205162048339844, + "learning_rate": 2.961516326489157e-05, + "loss": 0.13162994384765625, + "step": 1545 + }, + { + "epoch": 0.10450182506421522, + "grad_norm": 0.6541319489479065, + "learning_rate": 2.9614420309215968e-05, + "loss": 0.061004638671875, + "step": 1546 + }, + { + "epoch": 0.10456942003514938, + "grad_norm": 0.804447591304779, + "learning_rate": 2.9613676646403824e-05, + "loss": 0.1016693115234375, + "step": 1547 + }, + { + "epoch": 0.10463701500608355, + "grad_norm": 0.8233230113983154, + "learning_rate": 2.9612932276491124e-05, + "loss": 0.2037353515625, + "step": 1548 + }, + { + "epoch": 0.1047046099770177, + "grad_norm": 0.9141924381256104, + "learning_rate": 2.9612187199513883e-05, + "loss": 0.1316986083984375, + "step": 1549 + }, + { + "epoch": 0.10477220494795188, + "grad_norm": 0.5727037191390991, + "learning_rate": 2.9611441415508154e-05, + "loss": 0.05170440673828125, + "step": 1550 + }, + { + "epoch": 0.10483979991888603, + "grad_norm": 1.685557246208191, + "learning_rate": 2.961069492451002e-05, + "loss": 0.1998291015625, + "step": 1551 + }, + { + "epoch": 0.10490739488982019, + "grad_norm": 0.8837487101554871, + "learning_rate": 2.9609947726555603e-05, + "loss": 0.0715484619140625, + "step": 1552 + }, + { + "epoch": 0.10497498986075436, + "grad_norm": 1.1050331592559814, + "learning_rate": 2.960919982168106e-05, + "loss": 0.100616455078125, + "step": 1553 + }, + { + "epoch": 0.10504258483168852, + "grad_norm": 0.6147032380104065, + "learning_rate": 2.9608451209922576e-05, + "loss": 0.0587310791015625, + "step": 1554 + }, + { + "epoch": 0.10511017980262269, + "grad_norm": 0.45209139585494995, + "learning_rate": 2.960770189131637e-05, + "loss": 0.07195281982421875, + "step": 1555 + }, + { + "epoch": 0.10517777477355685, + "grad_norm": 0.4045514166355133, + "learning_rate": 2.9606951865898704e-05, + "loss": 0.0612030029296875, + "step": 1556 + }, + { + "epoch": 0.10524536974449102, + "grad_norm": 0.7220398783683777, + "learning_rate": 2.9606201133705866e-05, + "loss": 0.03900909423828125, + "step": 1557 + }, + { + "epoch": 0.10531296471542517, + "grad_norm": 1.257582426071167, + "learning_rate": 2.9605449694774184e-05, + "loss": 0.169281005859375, + "step": 1558 + }, + { + "epoch": 0.10538055968635933, + "grad_norm": 0.6432327032089233, + "learning_rate": 2.9604697549140015e-05, + "loss": 0.1328277587890625, + "step": 1559 + }, + { + "epoch": 0.1054481546572935, + "grad_norm": 0.24789145588874817, + "learning_rate": 2.9603944696839745e-05, + "loss": 0.040771484375, + "step": 1560 + }, + { + "epoch": 0.10551574962822766, + "grad_norm": 1.5373164415359497, + "learning_rate": 2.9603191137909822e-05, + "loss": 0.192474365234375, + "step": 1561 + }, + { + "epoch": 0.10558334459916183, + "grad_norm": 1.3282711505889893, + "learning_rate": 2.9602436872386688e-05, + "loss": 0.16521453857421875, + "step": 1562 + }, + { + "epoch": 0.10565093957009598, + "grad_norm": 1.137825846672058, + "learning_rate": 2.9601681900306847e-05, + "loss": 0.18035888671875, + "step": 1563 + }, + { + "epoch": 0.10571853454103015, + "grad_norm": 0.56137615442276, + "learning_rate": 2.9600926221706833e-05, + "loss": 0.08345413208007812, + "step": 1564 + }, + { + "epoch": 0.10578612951196431, + "grad_norm": 1.1542223691940308, + "learning_rate": 2.9600169836623204e-05, + "loss": 0.15088653564453125, + "step": 1565 + }, + { + "epoch": 0.10585372448289847, + "grad_norm": 0.6012085676193237, + "learning_rate": 2.959941274509256e-05, + "loss": 0.12237548828125, + "step": 1566 + }, + { + "epoch": 0.10592131945383264, + "grad_norm": 2.947068452835083, + "learning_rate": 2.9598654947151536e-05, + "loss": 0.235198974609375, + "step": 1567 + }, + { + "epoch": 0.1059889144247668, + "grad_norm": 0.6114075183868408, + "learning_rate": 2.9597896442836793e-05, + "loss": 0.055904388427734375, + "step": 1568 + }, + { + "epoch": 0.10605650939570097, + "grad_norm": 0.8120265007019043, + "learning_rate": 2.9597137232185042e-05, + "loss": 0.125396728515625, + "step": 1569 + }, + { + "epoch": 0.10612410436663512, + "grad_norm": 0.6305440068244934, + "learning_rate": 2.959637731523301e-05, + "loss": 0.09037017822265625, + "step": 1570 + }, + { + "epoch": 0.10619169933756928, + "grad_norm": 0.8467254638671875, + "learning_rate": 2.959561669201747e-05, + "loss": 0.1254425048828125, + "step": 1571 + }, + { + "epoch": 0.10625929430850345, + "grad_norm": 1.3710675239562988, + "learning_rate": 2.959485536257523e-05, + "loss": 0.144134521484375, + "step": 1572 + }, + { + "epoch": 0.1063268892794376, + "grad_norm": 2.3804140090942383, + "learning_rate": 2.959409332694312e-05, + "loss": 0.1904754638671875, + "step": 1573 + }, + { + "epoch": 0.10639448425037178, + "grad_norm": 1.6134828329086304, + "learning_rate": 2.9593330585158013e-05, + "loss": 0.10774421691894531, + "step": 1574 + }, + { + "epoch": 0.10646207922130593, + "grad_norm": 3.4847939014434814, + "learning_rate": 2.959256713725682e-05, + "loss": 0.261962890625, + "step": 1575 + }, + { + "epoch": 0.1065296741922401, + "grad_norm": 0.6772037744522095, + "learning_rate": 2.9591802983276478e-05, + "loss": 0.1622314453125, + "step": 1576 + }, + { + "epoch": 0.10659726916317426, + "grad_norm": 0.824802815914154, + "learning_rate": 2.959103812325396e-05, + "loss": 0.09199905395507812, + "step": 1577 + }, + { + "epoch": 0.10666486413410842, + "grad_norm": 0.7315614819526672, + "learning_rate": 2.959027255722628e-05, + "loss": 0.14557647705078125, + "step": 1578 + }, + { + "epoch": 0.10673245910504259, + "grad_norm": 0.4075874090194702, + "learning_rate": 2.958950628523048e-05, + "loss": 0.08248138427734375, + "step": 1579 + }, + { + "epoch": 0.10680005407597674, + "grad_norm": 0.7800294756889343, + "learning_rate": 2.9588739307303628e-05, + "loss": 0.15085601806640625, + "step": 1580 + }, + { + "epoch": 0.10686764904691091, + "grad_norm": 1.0667626857757568, + "learning_rate": 2.9587971623482842e-05, + "loss": 0.187713623046875, + "step": 1581 + }, + { + "epoch": 0.10693524401784507, + "grad_norm": 1.1196682453155518, + "learning_rate": 2.958720323380527e-05, + "loss": 0.2015838623046875, + "step": 1582 + }, + { + "epoch": 0.10700283898877923, + "grad_norm": 1.3506975173950195, + "learning_rate": 2.958643413830809e-05, + "loss": 0.2223663330078125, + "step": 1583 + }, + { + "epoch": 0.1070704339597134, + "grad_norm": 1.4134306907653809, + "learning_rate": 2.9585664337028516e-05, + "loss": 0.1819305419921875, + "step": 1584 + }, + { + "epoch": 0.10713802893064756, + "grad_norm": 1.6710461378097534, + "learning_rate": 2.9584893830003793e-05, + "loss": 0.3060302734375, + "step": 1585 + }, + { + "epoch": 0.10720562390158173, + "grad_norm": 0.6544693112373352, + "learning_rate": 2.9584122617271202e-05, + "loss": 0.08295440673828125, + "step": 1586 + }, + { + "epoch": 0.10727321887251588, + "grad_norm": 0.779772937297821, + "learning_rate": 2.958335069886806e-05, + "loss": 0.09198760986328125, + "step": 1587 + }, + { + "epoch": 0.10734081384345005, + "grad_norm": 1.8343219757080078, + "learning_rate": 2.9582578074831722e-05, + "loss": 0.1245880126953125, + "step": 1588 + }, + { + "epoch": 0.10740840881438421, + "grad_norm": 1.8314108848571777, + "learning_rate": 2.9581804745199566e-05, + "loss": 0.2261962890625, + "step": 1589 + }, + { + "epoch": 0.10747600378531837, + "grad_norm": 0.6464987397193909, + "learning_rate": 2.9581030710009012e-05, + "loss": 0.05986785888671875, + "step": 1590 + }, + { + "epoch": 0.10754359875625254, + "grad_norm": 1.3867608308792114, + "learning_rate": 2.9580255969297517e-05, + "loss": 0.2218017578125, + "step": 1591 + }, + { + "epoch": 0.1076111937271867, + "grad_norm": 1.4008893966674805, + "learning_rate": 2.9579480523102563e-05, + "loss": 0.20361328125, + "step": 1592 + }, + { + "epoch": 0.10767878869812086, + "grad_norm": 1.3388290405273438, + "learning_rate": 2.957870437146167e-05, + "loss": 0.1721019744873047, + "step": 1593 + }, + { + "epoch": 0.10774638366905502, + "grad_norm": 0.5519244074821472, + "learning_rate": 2.9577927514412403e-05, + "loss": 0.06903457641601562, + "step": 1594 + }, + { + "epoch": 0.10781397863998919, + "grad_norm": 1.7856333255767822, + "learning_rate": 2.9577149951992336e-05, + "loss": 0.24066162109375, + "step": 1595 + }, + { + "epoch": 0.10788157361092335, + "grad_norm": 1.601577877998352, + "learning_rate": 2.9576371684239104e-05, + "loss": 0.184600830078125, + "step": 1596 + }, + { + "epoch": 0.1079491685818575, + "grad_norm": 1.0128607749938965, + "learning_rate": 2.957559271119036e-05, + "loss": 0.077117919921875, + "step": 1597 + }, + { + "epoch": 0.10801676355279168, + "grad_norm": 1.5312604904174805, + "learning_rate": 2.9574813032883793e-05, + "loss": 0.12512969970703125, + "step": 1598 + }, + { + "epoch": 0.10808435852372583, + "grad_norm": 0.6220760345458984, + "learning_rate": 2.9574032649357134e-05, + "loss": 0.07994842529296875, + "step": 1599 + }, + { + "epoch": 0.10815195349466, + "grad_norm": 0.702788233757019, + "learning_rate": 2.957325156064814e-05, + "loss": 0.1015625, + "step": 1600 + }, + { + "epoch": 0.10821954846559416, + "grad_norm": 1.2727707624435425, + "learning_rate": 2.9572469766794604e-05, + "loss": 0.1771392822265625, + "step": 1601 + }, + { + "epoch": 0.10828714343652832, + "grad_norm": 0.32492414116859436, + "learning_rate": 2.957168726783436e-05, + "loss": 0.06540679931640625, + "step": 1602 + }, + { + "epoch": 0.10835473840746249, + "grad_norm": 1.7608318328857422, + "learning_rate": 2.9570904063805263e-05, + "loss": 0.29034423828125, + "step": 1603 + }, + { + "epoch": 0.10842233337839664, + "grad_norm": 1.7165700197219849, + "learning_rate": 2.9570120154745216e-05, + "loss": 0.1383819580078125, + "step": 1604 + }, + { + "epoch": 0.10848992834933081, + "grad_norm": 2.4756252765655518, + "learning_rate": 2.956933554069214e-05, + "loss": 0.164520263671875, + "step": 1605 + }, + { + "epoch": 0.10855752332026497, + "grad_norm": 0.8704918622970581, + "learning_rate": 2.9568550221684003e-05, + "loss": 0.13409423828125, + "step": 1606 + }, + { + "epoch": 0.10862511829119914, + "grad_norm": 1.374854564666748, + "learning_rate": 2.9567764197758808e-05, + "loss": 0.12374114990234375, + "step": 1607 + }, + { + "epoch": 0.1086927132621333, + "grad_norm": 0.28748321533203125, + "learning_rate": 2.9566977468954584e-05, + "loss": 0.03464508056640625, + "step": 1608 + }, + { + "epoch": 0.10876030823306745, + "grad_norm": 1.3254690170288086, + "learning_rate": 2.9566190035309403e-05, + "loss": 0.17315673828125, + "step": 1609 + }, + { + "epoch": 0.10882790320400162, + "grad_norm": 0.9803223609924316, + "learning_rate": 2.956540189686136e-05, + "loss": 0.14691162109375, + "step": 1610 + }, + { + "epoch": 0.10889549817493578, + "grad_norm": 0.33851391077041626, + "learning_rate": 2.956461305364859e-05, + "loss": 0.0728759765625, + "step": 1611 + }, + { + "epoch": 0.10896309314586995, + "grad_norm": 1.1346914768218994, + "learning_rate": 2.9563823505709267e-05, + "loss": 0.1182708740234375, + "step": 1612 + }, + { + "epoch": 0.10903068811680411, + "grad_norm": 0.7527435421943665, + "learning_rate": 2.956303325308159e-05, + "loss": 0.065948486328125, + "step": 1613 + }, + { + "epoch": 0.10909828308773828, + "grad_norm": 0.8733569383621216, + "learning_rate": 2.9562242295803795e-05, + "loss": 0.12635040283203125, + "step": 1614 + }, + { + "epoch": 0.10916587805867244, + "grad_norm": 0.560681164264679, + "learning_rate": 2.956145063391416e-05, + "loss": 0.056598663330078125, + "step": 1615 + }, + { + "epoch": 0.10923347302960659, + "grad_norm": 1.3389796018600464, + "learning_rate": 2.9560658267450987e-05, + "loss": 0.22613525390625, + "step": 1616 + }, + { + "epoch": 0.10930106800054076, + "grad_norm": 1.9246550798416138, + "learning_rate": 2.955986519645261e-05, + "loss": 0.20379638671875, + "step": 1617 + }, + { + "epoch": 0.10936866297147492, + "grad_norm": 1.4557033777236938, + "learning_rate": 2.9559071420957415e-05, + "loss": 0.176300048828125, + "step": 1618 + }, + { + "epoch": 0.10943625794240909, + "grad_norm": 0.6871220469474792, + "learning_rate": 2.9558276941003798e-05, + "loss": 0.0767059326171875, + "step": 1619 + }, + { + "epoch": 0.10950385291334325, + "grad_norm": 0.8438217043876648, + "learning_rate": 2.955748175663021e-05, + "loss": 0.194488525390625, + "step": 1620 + }, + { + "epoch": 0.1095714478842774, + "grad_norm": 0.905535101890564, + "learning_rate": 2.9556685867875118e-05, + "loss": 0.08667755126953125, + "step": 1621 + }, + { + "epoch": 0.10963904285521157, + "grad_norm": 0.8168148398399353, + "learning_rate": 2.955588927477704e-05, + "loss": 0.1110687255859375, + "step": 1622 + }, + { + "epoch": 0.10970663782614573, + "grad_norm": 0.5655999779701233, + "learning_rate": 2.9555091977374512e-05, + "loss": 0.06294631958007812, + "step": 1623 + }, + { + "epoch": 0.1097742327970799, + "grad_norm": 2.729858160018921, + "learning_rate": 2.9554293975706122e-05, + "loss": 0.2840576171875, + "step": 1624 + }, + { + "epoch": 0.10984182776801406, + "grad_norm": 1.0307098627090454, + "learning_rate": 2.9553495269810474e-05, + "loss": 0.16619873046875, + "step": 1625 + }, + { + "epoch": 0.10990942273894823, + "grad_norm": 1.2054272890090942, + "learning_rate": 2.955269585972622e-05, + "loss": 0.14990234375, + "step": 1626 + }, + { + "epoch": 0.10997701770988239, + "grad_norm": 0.4412432014942169, + "learning_rate": 2.9551895745492037e-05, + "loss": 0.056095123291015625, + "step": 1627 + }, + { + "epoch": 0.11004461268081654, + "grad_norm": 0.636306881904602, + "learning_rate": 2.955109492714664e-05, + "loss": 0.0522308349609375, + "step": 1628 + }, + { + "epoch": 0.11011220765175071, + "grad_norm": 0.5357932448387146, + "learning_rate": 2.9550293404728776e-05, + "loss": 0.05878448486328125, + "step": 1629 + }, + { + "epoch": 0.11017980262268487, + "grad_norm": 0.8416469693183899, + "learning_rate": 2.9549491178277234e-05, + "loss": 0.08137702941894531, + "step": 1630 + }, + { + "epoch": 0.11024739759361904, + "grad_norm": 2.4999539852142334, + "learning_rate": 2.9548688247830823e-05, + "loss": 0.1876373291015625, + "step": 1631 + }, + { + "epoch": 0.1103149925645532, + "grad_norm": 0.4924003481864929, + "learning_rate": 2.95478846134284e-05, + "loss": 0.03505706787109375, + "step": 1632 + }, + { + "epoch": 0.11038258753548735, + "grad_norm": 1.319171667098999, + "learning_rate": 2.954708027510885e-05, + "loss": 0.16344451904296875, + "step": 1633 + }, + { + "epoch": 0.11045018250642152, + "grad_norm": 1.136021375656128, + "learning_rate": 2.9546275232911082e-05, + "loss": 0.1952362060546875, + "step": 1634 + }, + { + "epoch": 0.11051777747735568, + "grad_norm": 0.6717979907989502, + "learning_rate": 2.9545469486874063e-05, + "loss": 0.1422119140625, + "step": 1635 + }, + { + "epoch": 0.11058537244828985, + "grad_norm": 0.5190969705581665, + "learning_rate": 2.9544663037036767e-05, + "loss": 0.07768630981445312, + "step": 1636 + }, + { + "epoch": 0.11065296741922401, + "grad_norm": 2.647998809814453, + "learning_rate": 2.9543855883438224e-05, + "loss": 0.249725341796875, + "step": 1637 + }, + { + "epoch": 0.11072056239015818, + "grad_norm": 0.5065568685531616, + "learning_rate": 2.9543048026117485e-05, + "loss": 0.07447052001953125, + "step": 1638 + }, + { + "epoch": 0.11078815736109233, + "grad_norm": 1.3660777807235718, + "learning_rate": 2.9542239465113638e-05, + "loss": 0.171966552734375, + "step": 1639 + }, + { + "epoch": 0.11085575233202649, + "grad_norm": 1.3288284540176392, + "learning_rate": 2.954143020046581e-05, + "loss": 0.21072006225585938, + "step": 1640 + }, + { + "epoch": 0.11092334730296066, + "grad_norm": 0.7618396878242493, + "learning_rate": 2.954062023221316e-05, + "loss": 0.0826416015625, + "step": 1641 + }, + { + "epoch": 0.11099094227389482, + "grad_norm": 1.492110252380371, + "learning_rate": 2.9539809560394873e-05, + "loss": 0.1367645263671875, + "step": 1642 + }, + { + "epoch": 0.11105853724482899, + "grad_norm": 0.3496204614639282, + "learning_rate": 2.9538998185050177e-05, + "loss": 0.073333740234375, + "step": 1643 + }, + { + "epoch": 0.11112613221576315, + "grad_norm": 0.9271671175956726, + "learning_rate": 2.9538186106218334e-05, + "loss": 0.177947998046875, + "step": 1644 + }, + { + "epoch": 0.11119372718669732, + "grad_norm": 0.42987048625946045, + "learning_rate": 2.9537373323938636e-05, + "loss": 0.08692169189453125, + "step": 1645 + }, + { + "epoch": 0.11126132215763147, + "grad_norm": 1.4602943658828735, + "learning_rate": 2.9536559838250407e-05, + "loss": 0.206573486328125, + "step": 1646 + }, + { + "epoch": 0.11132891712856563, + "grad_norm": 0.9716178774833679, + "learning_rate": 2.953574564919301e-05, + "loss": 0.1529693603515625, + "step": 1647 + }, + { + "epoch": 0.1113965120994998, + "grad_norm": 1.5655170679092407, + "learning_rate": 2.9534930756805845e-05, + "loss": 0.194122314453125, + "step": 1648 + }, + { + "epoch": 0.11146410707043396, + "grad_norm": 0.5948817729949951, + "learning_rate": 2.9534115161128337e-05, + "loss": 0.11159515380859375, + "step": 1649 + }, + { + "epoch": 0.11153170204136813, + "grad_norm": 1.8061072826385498, + "learning_rate": 2.9533298862199948e-05, + "loss": 0.14099502563476562, + "step": 1650 + }, + { + "epoch": 0.11159929701230228, + "grad_norm": 0.8925454616546631, + "learning_rate": 2.953248186006018e-05, + "loss": 0.1713409423828125, + "step": 1651 + }, + { + "epoch": 0.11166689198323644, + "grad_norm": 1.4116212129592896, + "learning_rate": 2.9531664154748564e-05, + "loss": 0.160675048828125, + "step": 1652 + }, + { + "epoch": 0.11173448695417061, + "grad_norm": 0.9395633935928345, + "learning_rate": 2.9530845746304665e-05, + "loss": 0.0858001708984375, + "step": 1653 + }, + { + "epoch": 0.11180208192510477, + "grad_norm": 0.6229743957519531, + "learning_rate": 2.9530026634768084e-05, + "loss": 0.13820648193359375, + "step": 1654 + }, + { + "epoch": 0.11186967689603894, + "grad_norm": 0.561579167842865, + "learning_rate": 2.952920682017845e-05, + "loss": 0.1087188720703125, + "step": 1655 + }, + { + "epoch": 0.1119372718669731, + "grad_norm": 0.7601852416992188, + "learning_rate": 2.9528386302575434e-05, + "loss": 0.11382293701171875, + "step": 1656 + }, + { + "epoch": 0.11200486683790727, + "grad_norm": 0.5892922282218933, + "learning_rate": 2.9527565081998738e-05, + "loss": 0.09698486328125, + "step": 1657 + }, + { + "epoch": 0.11207246180884142, + "grad_norm": 1.144029140472412, + "learning_rate": 2.95267431584881e-05, + "loss": 0.15777587890625, + "step": 1658 + }, + { + "epoch": 0.11214005677977558, + "grad_norm": 1.1850782632827759, + "learning_rate": 2.9525920532083285e-05, + "loss": 0.1273345947265625, + "step": 1659 + }, + { + "epoch": 0.11220765175070975, + "grad_norm": 0.5820687413215637, + "learning_rate": 2.95250972028241e-05, + "loss": 0.1095428466796875, + "step": 1660 + }, + { + "epoch": 0.1122752467216439, + "grad_norm": 0.8630566596984863, + "learning_rate": 2.952427317075038e-05, + "loss": 0.14176177978515625, + "step": 1661 + }, + { + "epoch": 0.11234284169257808, + "grad_norm": 0.8937469124794006, + "learning_rate": 2.9523448435902e-05, + "loss": 0.08069610595703125, + "step": 1662 + }, + { + "epoch": 0.11241043666351223, + "grad_norm": 0.7905822992324829, + "learning_rate": 2.9522622998318864e-05, + "loss": 0.088714599609375, + "step": 1663 + }, + { + "epoch": 0.11247803163444639, + "grad_norm": 2.692896604537964, + "learning_rate": 2.952179685804091e-05, + "loss": 0.2086181640625, + "step": 1664 + }, + { + "epoch": 0.11254562660538056, + "grad_norm": 0.6122433543205261, + "learning_rate": 2.952097001510812e-05, + "loss": 0.124542236328125, + "step": 1665 + }, + { + "epoch": 0.11261322157631472, + "grad_norm": 0.4530867040157318, + "learning_rate": 2.952014246956049e-05, + "loss": 0.08182525634765625, + "step": 1666 + }, + { + "epoch": 0.11268081654724889, + "grad_norm": 0.631808876991272, + "learning_rate": 2.9519314221438067e-05, + "loss": 0.11429595947265625, + "step": 1667 + }, + { + "epoch": 0.11274841151818304, + "grad_norm": 0.6999130845069885, + "learning_rate": 2.951848527078093e-05, + "loss": 0.1148529052734375, + "step": 1668 + }, + { + "epoch": 0.11281600648911722, + "grad_norm": 1.1315709352493286, + "learning_rate": 2.9517655617629185e-05, + "loss": 0.153717041015625, + "step": 1669 + }, + { + "epoch": 0.11288360146005137, + "grad_norm": 1.070884346961975, + "learning_rate": 2.9516825262022974e-05, + "loss": 0.11041259765625, + "step": 1670 + }, + { + "epoch": 0.11295119643098553, + "grad_norm": 0.4891761839389801, + "learning_rate": 2.9515994204002485e-05, + "loss": 0.084075927734375, + "step": 1671 + }, + { + "epoch": 0.1130187914019197, + "grad_norm": 0.24250289797782898, + "learning_rate": 2.9515162443607917e-05, + "loss": 0.0291290283203125, + "step": 1672 + }, + { + "epoch": 0.11308638637285386, + "grad_norm": 1.6215720176696777, + "learning_rate": 2.951432998087952e-05, + "loss": 0.2017822265625, + "step": 1673 + }, + { + "epoch": 0.11315398134378803, + "grad_norm": 1.3287206888198853, + "learning_rate": 2.951349681585758e-05, + "loss": 0.1359710693359375, + "step": 1674 + }, + { + "epoch": 0.11322157631472218, + "grad_norm": 1.6241270303726196, + "learning_rate": 2.95126629485824e-05, + "loss": 0.227752685546875, + "step": 1675 + }, + { + "epoch": 0.11328917128565635, + "grad_norm": 0.7366423010826111, + "learning_rate": 2.951182837909434e-05, + "loss": 0.1280517578125, + "step": 1676 + }, + { + "epoch": 0.11335676625659051, + "grad_norm": 2.2498481273651123, + "learning_rate": 2.951099310743377e-05, + "loss": 0.271453857421875, + "step": 1677 + }, + { + "epoch": 0.11342436122752467, + "grad_norm": 1.5974253416061401, + "learning_rate": 2.9510157133641114e-05, + "loss": 0.1588134765625, + "step": 1678 + }, + { + "epoch": 0.11349195619845884, + "grad_norm": 0.9766235947608948, + "learning_rate": 2.9509320457756812e-05, + "loss": 0.1363677978515625, + "step": 1679 + }, + { + "epoch": 0.113559551169393, + "grad_norm": 1.057425856590271, + "learning_rate": 2.950848307982136e-05, + "loss": 0.181427001953125, + "step": 1680 + }, + { + "epoch": 0.11362714614032716, + "grad_norm": 0.6370296478271484, + "learning_rate": 2.9507644999875264e-05, + "loss": 0.156005859375, + "step": 1681 + }, + { + "epoch": 0.11369474111126132, + "grad_norm": 0.34297412633895874, + "learning_rate": 2.9506806217959087e-05, + "loss": 0.03452873229980469, + "step": 1682 + }, + { + "epoch": 0.11376233608219548, + "grad_norm": 1.320840835571289, + "learning_rate": 2.95059667341134e-05, + "loss": 0.127685546875, + "step": 1683 + }, + { + "epoch": 0.11382993105312965, + "grad_norm": 1.1431907415390015, + "learning_rate": 2.9505126548378838e-05, + "loss": 0.2349853515625, + "step": 1684 + }, + { + "epoch": 0.1138975260240638, + "grad_norm": 0.8858899474143982, + "learning_rate": 2.9504285660796047e-05, + "loss": 0.073883056640625, + "step": 1685 + }, + { + "epoch": 0.11396512099499798, + "grad_norm": 1.7702937126159668, + "learning_rate": 2.950344407140571e-05, + "loss": 0.1690521240234375, + "step": 1686 + }, + { + "epoch": 0.11403271596593213, + "grad_norm": 0.6392465829849243, + "learning_rate": 2.9502601780248557e-05, + "loss": 0.0525665283203125, + "step": 1687 + }, + { + "epoch": 0.1141003109368663, + "grad_norm": 1.154543161392212, + "learning_rate": 2.9501758787365335e-05, + "loss": 0.169097900390625, + "step": 1688 + }, + { + "epoch": 0.11416790590780046, + "grad_norm": 0.5849068760871887, + "learning_rate": 2.9500915092796842e-05, + "loss": 0.10846710205078125, + "step": 1689 + }, + { + "epoch": 0.11423550087873462, + "grad_norm": 0.6870198249816895, + "learning_rate": 2.9500070696583897e-05, + "loss": 0.13022613525390625, + "step": 1690 + }, + { + "epoch": 0.11430309584966879, + "grad_norm": 0.9034292101860046, + "learning_rate": 2.9499225598767352e-05, + "loss": 0.0888671875, + "step": 1691 + }, + { + "epoch": 0.11437069082060294, + "grad_norm": 1.9634087085723877, + "learning_rate": 2.9498379799388104e-05, + "loss": 0.2384033203125, + "step": 1692 + }, + { + "epoch": 0.11443828579153711, + "grad_norm": 0.40519869327545166, + "learning_rate": 2.949753329848708e-05, + "loss": 0.055633544921875, + "step": 1693 + }, + { + "epoch": 0.11450588076247127, + "grad_norm": 0.5526342988014221, + "learning_rate": 2.9496686096105232e-05, + "loss": 0.131256103515625, + "step": 1694 + }, + { + "epoch": 0.11457347573340544, + "grad_norm": 0.3127427399158478, + "learning_rate": 2.949583819228356e-05, + "loss": 0.036396026611328125, + "step": 1695 + }, + { + "epoch": 0.1146410707043396, + "grad_norm": 1.8028607368469238, + "learning_rate": 2.9494989587063087e-05, + "loss": 0.18414306640625, + "step": 1696 + }, + { + "epoch": 0.11470866567527375, + "grad_norm": 1.2938990592956543, + "learning_rate": 2.9494140280484873e-05, + "loss": 0.139129638671875, + "step": 1697 + }, + { + "epoch": 0.11477626064620793, + "grad_norm": 1.4090338945388794, + "learning_rate": 2.9493290272590016e-05, + "loss": 0.14900970458984375, + "step": 1698 + }, + { + "epoch": 0.11484385561714208, + "grad_norm": 1.0437383651733398, + "learning_rate": 2.949243956341964e-05, + "loss": 0.10234832763671875, + "step": 1699 + }, + { + "epoch": 0.11491145058807625, + "grad_norm": 1.71880304813385, + "learning_rate": 2.9491588153014914e-05, + "loss": 0.203460693359375, + "step": 1700 + }, + { + "epoch": 0.11497904555901041, + "grad_norm": 2.7383532524108887, + "learning_rate": 2.9490736041417025e-05, + "loss": 0.14781570434570312, + "step": 1701 + }, + { + "epoch": 0.11504664052994457, + "grad_norm": 1.8797266483306885, + "learning_rate": 2.9489883228667214e-05, + "loss": 0.196868896484375, + "step": 1702 + }, + { + "epoch": 0.11511423550087874, + "grad_norm": 0.30146902799606323, + "learning_rate": 2.948902971480674e-05, + "loss": 0.054592132568359375, + "step": 1703 + }, + { + "epoch": 0.1151818304718129, + "grad_norm": 2.1825783252716064, + "learning_rate": 2.9488175499876902e-05, + "loss": 0.20188140869140625, + "step": 1704 + }, + { + "epoch": 0.11524942544274706, + "grad_norm": 1.0952033996582031, + "learning_rate": 2.9487320583919035e-05, + "loss": 0.17498779296875, + "step": 1705 + }, + { + "epoch": 0.11531702041368122, + "grad_norm": 0.9217065572738647, + "learning_rate": 2.94864649669745e-05, + "loss": 0.13089370727539062, + "step": 1706 + }, + { + "epoch": 0.11538461538461539, + "grad_norm": 0.48326075077056885, + "learning_rate": 2.9485608649084698e-05, + "loss": 0.05255889892578125, + "step": 1707 + }, + { + "epoch": 0.11545221035554955, + "grad_norm": 0.20226719975471497, + "learning_rate": 2.9484751630291065e-05, + "loss": 0.038623809814453125, + "step": 1708 + }, + { + "epoch": 0.1155198053264837, + "grad_norm": 1.2896637916564941, + "learning_rate": 2.948389391063507e-05, + "loss": 0.1443634033203125, + "step": 1709 + }, + { + "epoch": 0.11558740029741787, + "grad_norm": 0.7103646993637085, + "learning_rate": 2.9483035490158215e-05, + "loss": 0.0498809814453125, + "step": 1710 + }, + { + "epoch": 0.11565499526835203, + "grad_norm": 3.1910064220428467, + "learning_rate": 2.9482176368902027e-05, + "loss": 0.192962646484375, + "step": 1711 + }, + { + "epoch": 0.1157225902392862, + "grad_norm": 0.618211030960083, + "learning_rate": 2.9481316546908092e-05, + "loss": 0.06884002685546875, + "step": 1712 + }, + { + "epoch": 0.11579018521022036, + "grad_norm": 0.8020932078361511, + "learning_rate": 2.9480456024218e-05, + "loss": 0.10601806640625, + "step": 1713 + }, + { + "epoch": 0.11585778018115452, + "grad_norm": 0.6691611409187317, + "learning_rate": 2.947959480087339e-05, + "loss": 0.1078338623046875, + "step": 1714 + }, + { + "epoch": 0.11592537515208869, + "grad_norm": 0.303322970867157, + "learning_rate": 2.9478732876915935e-05, + "loss": 0.049957275390625, + "step": 1715 + }, + { + "epoch": 0.11599297012302284, + "grad_norm": 0.7376531958580017, + "learning_rate": 2.9477870252387343e-05, + "loss": 0.08958053588867188, + "step": 1716 + }, + { + "epoch": 0.11606056509395701, + "grad_norm": 1.1302233934402466, + "learning_rate": 2.9477006927329355e-05, + "loss": 0.13396453857421875, + "step": 1717 + }, + { + "epoch": 0.11612816006489117, + "grad_norm": 0.6182753443717957, + "learning_rate": 2.947614290178374e-05, + "loss": 0.07940673828125, + "step": 1718 + }, + { + "epoch": 0.11619575503582534, + "grad_norm": 0.3887995183467865, + "learning_rate": 2.94752781757923e-05, + "loss": 0.03955650329589844, + "step": 1719 + }, + { + "epoch": 0.1162633500067595, + "grad_norm": 1.2569564580917358, + "learning_rate": 2.9474412749396882e-05, + "loss": 0.1032257080078125, + "step": 1720 + }, + { + "epoch": 0.11633094497769365, + "grad_norm": 0.15980063378810883, + "learning_rate": 2.9473546622639365e-05, + "loss": 0.030645370483398438, + "step": 1721 + }, + { + "epoch": 0.11639853994862782, + "grad_norm": 1.6150436401367188, + "learning_rate": 2.9472679795561648e-05, + "loss": 0.15875244140625, + "step": 1722 + }, + { + "epoch": 0.11646613491956198, + "grad_norm": 0.9577053785324097, + "learning_rate": 2.9471812268205675e-05, + "loss": 0.148101806640625, + "step": 1723 + }, + { + "epoch": 0.11653372989049615, + "grad_norm": 0.755937397480011, + "learning_rate": 2.9470944040613434e-05, + "loss": 0.12864971160888672, + "step": 1724 + }, + { + "epoch": 0.11660132486143031, + "grad_norm": 0.6324185729026794, + "learning_rate": 2.947007511282692e-05, + "loss": 0.094390869140625, + "step": 1725 + }, + { + "epoch": 0.11666891983236448, + "grad_norm": 0.519436776638031, + "learning_rate": 2.9469205484888184e-05, + "loss": 0.1115264892578125, + "step": 1726 + }, + { + "epoch": 0.11673651480329864, + "grad_norm": 1.0799213647842407, + "learning_rate": 2.9468335156839307e-05, + "loss": 0.14251708984375, + "step": 1727 + }, + { + "epoch": 0.11680410977423279, + "grad_norm": 0.7380872368812561, + "learning_rate": 2.9467464128722394e-05, + "loss": 0.0791168212890625, + "step": 1728 + }, + { + "epoch": 0.11687170474516696, + "grad_norm": 0.8153037428855896, + "learning_rate": 2.94665924005796e-05, + "loss": 0.168212890625, + "step": 1729 + }, + { + "epoch": 0.11693929971610112, + "grad_norm": 1.00157630443573, + "learning_rate": 2.946571997245309e-05, + "loss": 0.1916351318359375, + "step": 1730 + }, + { + "epoch": 0.11700689468703529, + "grad_norm": 0.6516026258468628, + "learning_rate": 2.946484684438509e-05, + "loss": 0.1382904052734375, + "step": 1731 + }, + { + "epoch": 0.11707448965796945, + "grad_norm": 0.8698115944862366, + "learning_rate": 2.9463973016417844e-05, + "loss": 0.1680908203125, + "step": 1732 + }, + { + "epoch": 0.1171420846289036, + "grad_norm": 0.7778078317642212, + "learning_rate": 2.9463098488593632e-05, + "loss": 0.10610198974609375, + "step": 1733 + }, + { + "epoch": 0.11720967959983777, + "grad_norm": 0.9385477304458618, + "learning_rate": 2.9462223260954775e-05, + "loss": 0.129852294921875, + "step": 1734 + }, + { + "epoch": 0.11727727457077193, + "grad_norm": 3.284642457962036, + "learning_rate": 2.9461347333543615e-05, + "loss": 0.2412872314453125, + "step": 1735 + }, + { + "epoch": 0.1173448695417061, + "grad_norm": 1.6015939712524414, + "learning_rate": 2.9460470706402532e-05, + "loss": 0.21031951904296875, + "step": 1736 + }, + { + "epoch": 0.11741246451264026, + "grad_norm": 2.0407567024230957, + "learning_rate": 2.9459593379573952e-05, + "loss": 0.2018890380859375, + "step": 1737 + }, + { + "epoch": 0.11748005948357443, + "grad_norm": 1.385827660560608, + "learning_rate": 2.9458715353100318e-05, + "loss": 0.1772003173828125, + "step": 1738 + }, + { + "epoch": 0.11754765445450858, + "grad_norm": 1.693955659866333, + "learning_rate": 2.9457836627024118e-05, + "loss": 0.1560516357421875, + "step": 1739 + }, + { + "epoch": 0.11761524942544274, + "grad_norm": 0.7203987240791321, + "learning_rate": 2.9456957201387872e-05, + "loss": 0.050624847412109375, + "step": 1740 + }, + { + "epoch": 0.11768284439637691, + "grad_norm": 0.772426426410675, + "learning_rate": 2.945607707623413e-05, + "loss": 0.0723114013671875, + "step": 1741 + }, + { + "epoch": 0.11775043936731107, + "grad_norm": 0.8770118355751038, + "learning_rate": 2.9455196251605477e-05, + "loss": 0.0725250244140625, + "step": 1742 + }, + { + "epoch": 0.11781803433824524, + "grad_norm": 1.70114004611969, + "learning_rate": 2.9454314727544532e-05, + "loss": 0.254547119140625, + "step": 1743 + }, + { + "epoch": 0.1178856293091794, + "grad_norm": 0.6263175010681152, + "learning_rate": 2.9453432504093955e-05, + "loss": 0.145599365234375, + "step": 1744 + }, + { + "epoch": 0.11795322428011355, + "grad_norm": 0.8089709281921387, + "learning_rate": 2.945254958129642e-05, + "loss": 0.07483673095703125, + "step": 1745 + }, + { + "epoch": 0.11802081925104772, + "grad_norm": 1.0432684421539307, + "learning_rate": 2.9451665959194664e-05, + "loss": 0.1822662353515625, + "step": 1746 + }, + { + "epoch": 0.11808841422198188, + "grad_norm": 0.8857980370521545, + "learning_rate": 2.9450781637831433e-05, + "loss": 0.14032745361328125, + "step": 1747 + }, + { + "epoch": 0.11815600919291605, + "grad_norm": 0.8055933117866516, + "learning_rate": 2.9449896617249514e-05, + "loss": 0.1640472412109375, + "step": 1748 + }, + { + "epoch": 0.11822360416385021, + "grad_norm": 1.0462051630020142, + "learning_rate": 2.944901089749174e-05, + "loss": 0.05240631103515625, + "step": 1749 + }, + { + "epoch": 0.11829119913478438, + "grad_norm": 1.2073792219161987, + "learning_rate": 2.944812447860096e-05, + "loss": 0.2398681640625, + "step": 1750 + }, + { + "epoch": 0.11835879410571853, + "grad_norm": 0.8302114605903625, + "learning_rate": 2.9447237360620062e-05, + "loss": 0.1295623779296875, + "step": 1751 + }, + { + "epoch": 0.11842638907665269, + "grad_norm": 1.7276053428649902, + "learning_rate": 2.9446349543591976e-05, + "loss": 0.1850433349609375, + "step": 1752 + }, + { + "epoch": 0.11849398404758686, + "grad_norm": 0.7345486879348755, + "learning_rate": 2.9445461027559658e-05, + "loss": 0.15740966796875, + "step": 1753 + }, + { + "epoch": 0.11856157901852102, + "grad_norm": 1.4695756435394287, + "learning_rate": 2.94445718125661e-05, + "loss": 0.1927947998046875, + "step": 1754 + }, + { + "epoch": 0.11862917398945519, + "grad_norm": 1.5897259712219238, + "learning_rate": 2.9443681898654332e-05, + "loss": 0.17327880859375, + "step": 1755 + }, + { + "epoch": 0.11869676896038935, + "grad_norm": 0.5142993330955505, + "learning_rate": 2.9442791285867407e-05, + "loss": 0.0718994140625, + "step": 1756 + }, + { + "epoch": 0.11876436393132352, + "grad_norm": 1.2564897537231445, + "learning_rate": 2.9441899974248413e-05, + "loss": 0.2230072021484375, + "step": 1757 + }, + { + "epoch": 0.11883195890225767, + "grad_norm": 1.126986026763916, + "learning_rate": 2.9441007963840496e-05, + "loss": 0.157867431640625, + "step": 1758 + }, + { + "epoch": 0.11889955387319183, + "grad_norm": 1.6591876745224, + "learning_rate": 2.94401152546868e-05, + "loss": 0.19256591796875, + "step": 1759 + }, + { + "epoch": 0.118967148844126, + "grad_norm": 0.32176217436790466, + "learning_rate": 2.943922184683053e-05, + "loss": 0.022874832153320312, + "step": 1760 + }, + { + "epoch": 0.11903474381506016, + "grad_norm": 0.44558149576187134, + "learning_rate": 2.9438327740314907e-05, + "loss": 0.06652069091796875, + "step": 1761 + }, + { + "epoch": 0.11910233878599433, + "grad_norm": 0.9895987510681152, + "learning_rate": 2.94374329351832e-05, + "loss": 0.17327880859375, + "step": 1762 + }, + { + "epoch": 0.11916993375692848, + "grad_norm": 1.4823338985443115, + "learning_rate": 2.9436537431478695e-05, + "loss": 0.13440704345703125, + "step": 1763 + }, + { + "epoch": 0.11923752872786264, + "grad_norm": 0.6034421324729919, + "learning_rate": 2.943564122924474e-05, + "loss": 0.1157379150390625, + "step": 1764 + }, + { + "epoch": 0.11930512369879681, + "grad_norm": 1.3060141801834106, + "learning_rate": 2.9434744328524678e-05, + "loss": 0.1268768310546875, + "step": 1765 + }, + { + "epoch": 0.11937271866973097, + "grad_norm": 1.7148597240447998, + "learning_rate": 2.9433846729361923e-05, + "loss": 0.22808837890625, + "step": 1766 + }, + { + "epoch": 0.11944031364066514, + "grad_norm": 1.3258147239685059, + "learning_rate": 2.94329484317999e-05, + "loss": 0.183868408203125, + "step": 1767 + }, + { + "epoch": 0.1195079086115993, + "grad_norm": 1.9502452611923218, + "learning_rate": 2.943204943588207e-05, + "loss": 0.2013092041015625, + "step": 1768 + }, + { + "epoch": 0.11957550358253347, + "grad_norm": 1.2561495304107666, + "learning_rate": 2.9431149741651943e-05, + "loss": 0.119140625, + "step": 1769 + }, + { + "epoch": 0.11964309855346762, + "grad_norm": 1.4713863134384155, + "learning_rate": 2.9430249349153034e-05, + "loss": 0.2569580078125, + "step": 1770 + }, + { + "epoch": 0.11971069352440178, + "grad_norm": 1.591068148612976, + "learning_rate": 2.9429348258428933e-05, + "loss": 0.29205322265625, + "step": 1771 + }, + { + "epoch": 0.11977828849533595, + "grad_norm": 2.778146505355835, + "learning_rate": 2.9428446469523224e-05, + "loss": 0.245330810546875, + "step": 1772 + }, + { + "epoch": 0.1198458834662701, + "grad_norm": 1.6651017665863037, + "learning_rate": 2.942754398247954e-05, + "loss": 0.2469482421875, + "step": 1773 + }, + { + "epoch": 0.11991347843720428, + "grad_norm": 0.6103789210319519, + "learning_rate": 2.942664079734156e-05, + "loss": 0.05950927734375, + "step": 1774 + }, + { + "epoch": 0.11998107340813843, + "grad_norm": 0.4145642817020416, + "learning_rate": 2.9425736914152977e-05, + "loss": 0.0590667724609375, + "step": 1775 + }, + { + "epoch": 0.1200486683790726, + "grad_norm": 0.5521085858345032, + "learning_rate": 2.9424832332957532e-05, + "loss": 0.057849884033203125, + "step": 1776 + }, + { + "epoch": 0.12011626335000676, + "grad_norm": 0.5350801944732666, + "learning_rate": 2.942392705379899e-05, + "loss": 0.12943267822265625, + "step": 1777 + }, + { + "epoch": 0.12018385832094092, + "grad_norm": 1.9689244031906128, + "learning_rate": 2.9423021076721155e-05, + "loss": 0.225921630859375, + "step": 1778 + }, + { + "epoch": 0.12025145329187509, + "grad_norm": 0.35977303981781006, + "learning_rate": 2.942211440176787e-05, + "loss": 0.041233062744140625, + "step": 1779 + }, + { + "epoch": 0.12031904826280924, + "grad_norm": 1.0390013456344604, + "learning_rate": 2.9421207028982992e-05, + "loss": 0.188568115234375, + "step": 1780 + }, + { + "epoch": 0.12038664323374342, + "grad_norm": 1.3091728687286377, + "learning_rate": 2.942029895841044e-05, + "loss": 0.1949462890625, + "step": 1781 + }, + { + "epoch": 0.12045423820467757, + "grad_norm": 0.23875384032726288, + "learning_rate": 2.941939019009414e-05, + "loss": 0.04004669189453125, + "step": 1782 + }, + { + "epoch": 0.12052183317561173, + "grad_norm": 0.770535945892334, + "learning_rate": 2.941848072407807e-05, + "loss": 0.0937042236328125, + "step": 1783 + }, + { + "epoch": 0.1205894281465459, + "grad_norm": 0.6207149028778076, + "learning_rate": 2.941757056040624e-05, + "loss": 0.060489654541015625, + "step": 1784 + }, + { + "epoch": 0.12065702311748006, + "grad_norm": 1.2213640213012695, + "learning_rate": 2.9416659699122682e-05, + "loss": 0.11440277099609375, + "step": 1785 + }, + { + "epoch": 0.12072461808841423, + "grad_norm": 0.4594615697860718, + "learning_rate": 2.9415748140271466e-05, + "loss": 0.104339599609375, + "step": 1786 + }, + { + "epoch": 0.12079221305934838, + "grad_norm": 0.3006437122821808, + "learning_rate": 2.9414835883896713e-05, + "loss": 0.0341644287109375, + "step": 1787 + }, + { + "epoch": 0.12085980803028255, + "grad_norm": 0.9243778586387634, + "learning_rate": 2.941392293004255e-05, + "loss": 0.12845611572265625, + "step": 1788 + }, + { + "epoch": 0.12092740300121671, + "grad_norm": 1.1046245098114014, + "learning_rate": 2.9413009278753158e-05, + "loss": 0.2579345703125, + "step": 1789 + }, + { + "epoch": 0.12099499797215087, + "grad_norm": 0.6572809815406799, + "learning_rate": 2.941209493007274e-05, + "loss": 0.14605712890625, + "step": 1790 + }, + { + "epoch": 0.12106259294308504, + "grad_norm": 0.6897707581520081, + "learning_rate": 2.9411179884045546e-05, + "loss": 0.0536651611328125, + "step": 1791 + }, + { + "epoch": 0.1211301879140192, + "grad_norm": 0.3020719289779663, + "learning_rate": 2.9410264140715846e-05, + "loss": 0.04400634765625, + "step": 1792 + }, + { + "epoch": 0.12119778288495336, + "grad_norm": 0.6939516663551331, + "learning_rate": 2.940934770012795e-05, + "loss": 0.1325531005859375, + "step": 1793 + }, + { + "epoch": 0.12126537785588752, + "grad_norm": 1.6605864763259888, + "learning_rate": 2.94084305623262e-05, + "loss": 0.1665496826171875, + "step": 1794 + }, + { + "epoch": 0.12133297282682168, + "grad_norm": 0.8662204146385193, + "learning_rate": 2.940751272735498e-05, + "loss": 0.058246612548828125, + "step": 1795 + }, + { + "epoch": 0.12140056779775585, + "grad_norm": 1.0379302501678467, + "learning_rate": 2.940659419525869e-05, + "loss": 0.08545684814453125, + "step": 1796 + }, + { + "epoch": 0.12146816276869, + "grad_norm": 0.47196438908576965, + "learning_rate": 2.940567496608178e-05, + "loss": 0.06847381591796875, + "step": 1797 + }, + { + "epoch": 0.12153575773962418, + "grad_norm": 0.4576782286167145, + "learning_rate": 2.9404755039868726e-05, + "loss": 0.0730438232421875, + "step": 1798 + }, + { + "epoch": 0.12160335271055833, + "grad_norm": 0.4590739607810974, + "learning_rate": 2.9403834416664042e-05, + "loss": 0.06568527221679688, + "step": 1799 + }, + { + "epoch": 0.1216709476814925, + "grad_norm": 2.156602621078491, + "learning_rate": 2.9402913096512274e-05, + "loss": 0.174957275390625, + "step": 1800 + }, + { + "epoch": 0.12173854265242666, + "grad_norm": 0.8015769124031067, + "learning_rate": 2.9401991079457995e-05, + "loss": 0.1382598876953125, + "step": 1801 + }, + { + "epoch": 0.12180613762336082, + "grad_norm": 0.2575283646583557, + "learning_rate": 2.9401068365545828e-05, + "loss": 0.040287017822265625, + "step": 1802 + }, + { + "epoch": 0.12187373259429499, + "grad_norm": 0.3709827959537506, + "learning_rate": 2.9400144954820414e-05, + "loss": 0.050693511962890625, + "step": 1803 + }, + { + "epoch": 0.12194132756522914, + "grad_norm": 2.6175763607025146, + "learning_rate": 2.9399220847326427e-05, + "loss": 0.2213287353515625, + "step": 1804 + }, + { + "epoch": 0.12200892253616331, + "grad_norm": 2.2242369651794434, + "learning_rate": 2.9398296043108595e-05, + "loss": 0.2454833984375, + "step": 1805 + }, + { + "epoch": 0.12207651750709747, + "grad_norm": 1.542055368423462, + "learning_rate": 2.9397370542211653e-05, + "loss": 0.21343994140625, + "step": 1806 + }, + { + "epoch": 0.12214411247803164, + "grad_norm": 0.9264904856681824, + "learning_rate": 2.9396444344680392e-05, + "loss": 0.14113903045654297, + "step": 1807 + }, + { + "epoch": 0.1222117074489658, + "grad_norm": 1.2974929809570312, + "learning_rate": 2.939551745055962e-05, + "loss": 0.11896133422851562, + "step": 1808 + }, + { + "epoch": 0.12227930241989995, + "grad_norm": 0.6014177203178406, + "learning_rate": 2.939458985989419e-05, + "loss": 0.110687255859375, + "step": 1809 + }, + { + "epoch": 0.12234689739083413, + "grad_norm": 1.0726392269134521, + "learning_rate": 2.9393661572728987e-05, + "loss": 0.087860107421875, + "step": 1810 + }, + { + "epoch": 0.12241449236176828, + "grad_norm": 1.3604390621185303, + "learning_rate": 2.9392732589108916e-05, + "loss": 0.1597900390625, + "step": 1811 + }, + { + "epoch": 0.12248208733270245, + "grad_norm": 0.11384807527065277, + "learning_rate": 2.9391802909078945e-05, + "loss": 0.019430160522460938, + "step": 1812 + }, + { + "epoch": 0.12254968230363661, + "grad_norm": 0.4962538778781891, + "learning_rate": 2.939087253268404e-05, + "loss": 0.1197357177734375, + "step": 1813 + }, + { + "epoch": 0.12261727727457077, + "grad_norm": 0.9364336729049683, + "learning_rate": 2.9389941459969228e-05, + "loss": 0.13935089111328125, + "step": 1814 + }, + { + "epoch": 0.12268487224550494, + "grad_norm": 1.1481441259384155, + "learning_rate": 2.9389009690979562e-05, + "loss": 0.1466846466064453, + "step": 1815 + }, + { + "epoch": 0.12275246721643909, + "grad_norm": 0.7098854780197144, + "learning_rate": 2.9388077225760116e-05, + "loss": 0.09423828125, + "step": 1816 + }, + { + "epoch": 0.12282006218737326, + "grad_norm": 1.848840594291687, + "learning_rate": 2.938714406435602e-05, + "loss": 0.26654052734375, + "step": 1817 + }, + { + "epoch": 0.12288765715830742, + "grad_norm": 1.9868718385696411, + "learning_rate": 2.938621020681242e-05, + "loss": 0.2186279296875, + "step": 1818 + }, + { + "epoch": 0.12295525212924159, + "grad_norm": 0.4407539367675781, + "learning_rate": 2.93852756531745e-05, + "loss": 0.06201934814453125, + "step": 1819 + }, + { + "epoch": 0.12302284710017575, + "grad_norm": 1.0941531658172607, + "learning_rate": 2.938434040348749e-05, + "loss": 0.10410308837890625, + "step": 1820 + }, + { + "epoch": 0.1230904420711099, + "grad_norm": 0.9741116166114807, + "learning_rate": 2.938340445779663e-05, + "loss": 0.178924560546875, + "step": 1821 + }, + { + "epoch": 0.12315803704204407, + "grad_norm": 0.9675801396369934, + "learning_rate": 2.9382467816147217e-05, + "loss": 0.1488037109375, + "step": 1822 + }, + { + "epoch": 0.12322563201297823, + "grad_norm": 0.6449275612831116, + "learning_rate": 2.9381530478584564e-05, + "loss": 0.1159210205078125, + "step": 1823 + }, + { + "epoch": 0.1232932269839124, + "grad_norm": 1.0107247829437256, + "learning_rate": 2.938059244515403e-05, + "loss": 0.1905517578125, + "step": 1824 + }, + { + "epoch": 0.12336082195484656, + "grad_norm": 2.0948398113250732, + "learning_rate": 2.9379653715901003e-05, + "loss": 0.19219970703125, + "step": 1825 + }, + { + "epoch": 0.12342841692578072, + "grad_norm": 0.5792005658149719, + "learning_rate": 2.93787142908709e-05, + "loss": 0.0910186767578125, + "step": 1826 + }, + { + "epoch": 0.12349601189671489, + "grad_norm": 0.2881152629852295, + "learning_rate": 2.937777417010918e-05, + "loss": 0.046367645263671875, + "step": 1827 + }, + { + "epoch": 0.12356360686764904, + "grad_norm": 0.707108736038208, + "learning_rate": 2.9376833353661334e-05, + "loss": 0.1081695556640625, + "step": 1828 + }, + { + "epoch": 0.12363120183858321, + "grad_norm": 0.42362573742866516, + "learning_rate": 2.937589184157288e-05, + "loss": 0.1079559326171875, + "step": 1829 + }, + { + "epoch": 0.12369879680951737, + "grad_norm": 0.24744562804698944, + "learning_rate": 2.937494963388938e-05, + "loss": 0.03812408447265625, + "step": 1830 + }, + { + "epoch": 0.12376639178045154, + "grad_norm": 1.7444881200790405, + "learning_rate": 2.9374006730656414e-05, + "loss": 0.13690185546875, + "step": 1831 + }, + { + "epoch": 0.1238339867513857, + "grad_norm": 1.316046953201294, + "learning_rate": 2.9373063131919615e-05, + "loss": 0.1316070556640625, + "step": 1832 + }, + { + "epoch": 0.12390158172231985, + "grad_norm": 0.8658691644668579, + "learning_rate": 2.9372118837724633e-05, + "loss": 0.09961700439453125, + "step": 1833 + }, + { + "epoch": 0.12396917669325402, + "grad_norm": 1.0466127395629883, + "learning_rate": 2.9371173848117167e-05, + "loss": 0.128509521484375, + "step": 1834 + }, + { + "epoch": 0.12403677166418818, + "grad_norm": 0.9357268810272217, + "learning_rate": 2.9370228163142935e-05, + "loss": 0.17779541015625, + "step": 1835 + }, + { + "epoch": 0.12410436663512235, + "grad_norm": 0.9564542174339294, + "learning_rate": 2.9369281782847696e-05, + "loss": 0.07474517822265625, + "step": 1836 + }, + { + "epoch": 0.12417196160605651, + "grad_norm": 1.6495757102966309, + "learning_rate": 2.9368334707277245e-05, + "loss": 0.26617431640625, + "step": 1837 + }, + { + "epoch": 0.12423955657699068, + "grad_norm": 0.6601228713989258, + "learning_rate": 2.93673869364774e-05, + "loss": 0.0839080810546875, + "step": 1838 + }, + { + "epoch": 0.12430715154792484, + "grad_norm": 0.5518696308135986, + "learning_rate": 2.936643847049403e-05, + "loss": 0.05499267578125, + "step": 1839 + }, + { + "epoch": 0.12437474651885899, + "grad_norm": 0.878733217716217, + "learning_rate": 2.936548930937302e-05, + "loss": 0.1455078125, + "step": 1840 + }, + { + "epoch": 0.12444234148979316, + "grad_norm": 1.0069079399108887, + "learning_rate": 2.93645394531603e-05, + "loss": 0.14391326904296875, + "step": 1841 + }, + { + "epoch": 0.12450993646072732, + "grad_norm": 0.70826655626297, + "learning_rate": 2.9363588901901832e-05, + "loss": 0.06739044189453125, + "step": 1842 + }, + { + "epoch": 0.12457753143166149, + "grad_norm": 0.19889198243618011, + "learning_rate": 2.9362637655643603e-05, + "loss": 0.032817840576171875, + "step": 1843 + }, + { + "epoch": 0.12464512640259565, + "grad_norm": 0.34950336813926697, + "learning_rate": 2.9361685714431646e-05, + "loss": 0.0619964599609375, + "step": 1844 + }, + { + "epoch": 0.1247127213735298, + "grad_norm": 0.6410509943962097, + "learning_rate": 2.936073307831202e-05, + "loss": 0.13184356689453125, + "step": 1845 + }, + { + "epoch": 0.12478031634446397, + "grad_norm": 0.5464912056922913, + "learning_rate": 2.9359779747330815e-05, + "loss": 0.04984283447265625, + "step": 1846 + }, + { + "epoch": 0.12484791131539813, + "grad_norm": 0.5522761940956116, + "learning_rate": 2.9358825721534167e-05, + "loss": 0.0348968505859375, + "step": 1847 + }, + { + "epoch": 0.1249155062863323, + "grad_norm": 0.5998727679252625, + "learning_rate": 2.9357871000968236e-05, + "loss": 0.08196258544921875, + "step": 1848 + }, + { + "epoch": 0.12498310125726646, + "grad_norm": 2.013110637664795, + "learning_rate": 2.935691558567921e-05, + "loss": 0.15042495727539062, + "step": 1849 + }, + { + "epoch": 0.12505069622820061, + "grad_norm": 0.6623067259788513, + "learning_rate": 2.9355959475713327e-05, + "loss": 0.08745574951171875, + "step": 1850 + }, + { + "epoch": 0.12511829119913478, + "grad_norm": 0.35393643379211426, + "learning_rate": 2.9355002671116847e-05, + "loss": 0.046627044677734375, + "step": 1851 + }, + { + "epoch": 0.12518588617006896, + "grad_norm": 0.8304163217544556, + "learning_rate": 2.9354045171936064e-05, + "loss": 0.0963287353515625, + "step": 1852 + }, + { + "epoch": 0.1252534811410031, + "grad_norm": 1.3529093265533447, + "learning_rate": 2.9353086978217307e-05, + "loss": 0.169647216796875, + "step": 1853 + }, + { + "epoch": 0.12532107611193727, + "grad_norm": 0.9918988943099976, + "learning_rate": 2.935212809000694e-05, + "loss": 0.191925048828125, + "step": 1854 + }, + { + "epoch": 0.12538867108287144, + "grad_norm": 0.8264647722244263, + "learning_rate": 2.9351168507351363e-05, + "loss": 0.12081336975097656, + "step": 1855 + }, + { + "epoch": 0.1254562660538056, + "grad_norm": 0.471866637468338, + "learning_rate": 2.9350208230297004e-05, + "loss": 0.06020355224609375, + "step": 1856 + }, + { + "epoch": 0.12552386102473975, + "grad_norm": 1.000571370124817, + "learning_rate": 2.9349247258890327e-05, + "loss": 0.206329345703125, + "step": 1857 + }, + { + "epoch": 0.12559145599567392, + "grad_norm": 0.4387305974960327, + "learning_rate": 2.9348285593177834e-05, + "loss": 0.0625457763671875, + "step": 1858 + }, + { + "epoch": 0.1256590509666081, + "grad_norm": 0.6401410698890686, + "learning_rate": 2.9347323233206052e-05, + "loss": 0.108612060546875, + "step": 1859 + }, + { + "epoch": 0.12572664593754224, + "grad_norm": 1.3345394134521484, + "learning_rate": 2.9346360179021546e-05, + "loss": 0.25555419921875, + "step": 1860 + }, + { + "epoch": 0.1257942409084764, + "grad_norm": 1.395276665687561, + "learning_rate": 2.9345396430670918e-05, + "loss": 0.1341094970703125, + "step": 1861 + }, + { + "epoch": 0.12586183587941058, + "grad_norm": 0.2964954972267151, + "learning_rate": 2.934443198820079e-05, + "loss": 0.03911590576171875, + "step": 1862 + }, + { + "epoch": 0.12592943085034475, + "grad_norm": 0.8307514786720276, + "learning_rate": 2.9343466851657844e-05, + "loss": 0.109100341796875, + "step": 1863 + }, + { + "epoch": 0.1259970258212789, + "grad_norm": 1.061018705368042, + "learning_rate": 2.9342501021088764e-05, + "loss": 0.209991455078125, + "step": 1864 + }, + { + "epoch": 0.12606462079221306, + "grad_norm": 0.5985649824142456, + "learning_rate": 2.9341534496540295e-05, + "loss": 0.14797210693359375, + "step": 1865 + }, + { + "epoch": 0.12613221576314723, + "grad_norm": 5.723206996917725, + "learning_rate": 2.9340567278059193e-05, + "loss": 0.2610015869140625, + "step": 1866 + }, + { + "epoch": 0.12619981073408137, + "grad_norm": 0.6415953636169434, + "learning_rate": 2.9339599365692265e-05, + "loss": 0.145965576171875, + "step": 1867 + }, + { + "epoch": 0.12626740570501555, + "grad_norm": 1.2391083240509033, + "learning_rate": 2.9338630759486345e-05, + "loss": 0.2264404296875, + "step": 1868 + }, + { + "epoch": 0.12633500067594972, + "grad_norm": 1.8224118947982788, + "learning_rate": 2.9337661459488298e-05, + "loss": 0.23577880859375, + "step": 1869 + }, + { + "epoch": 0.12640259564688386, + "grad_norm": 0.776350736618042, + "learning_rate": 2.9336691465745022e-05, + "loss": 0.16778564453125, + "step": 1870 + }, + { + "epoch": 0.12647019061781803, + "grad_norm": 0.443887323141098, + "learning_rate": 2.9335720778303453e-05, + "loss": 0.0755767822265625, + "step": 1871 + }, + { + "epoch": 0.1265377855887522, + "grad_norm": 0.22366495430469513, + "learning_rate": 2.933474939721056e-05, + "loss": 0.03452301025390625, + "step": 1872 + }, + { + "epoch": 0.12660538055968637, + "grad_norm": 0.9720996618270874, + "learning_rate": 2.933377732251335e-05, + "loss": 0.169281005859375, + "step": 1873 + }, + { + "epoch": 0.1266729755306205, + "grad_norm": 1.0381245613098145, + "learning_rate": 2.9332804554258847e-05, + "loss": 0.153533935546875, + "step": 1874 + }, + { + "epoch": 0.12674057050155468, + "grad_norm": 0.3482329845428467, + "learning_rate": 2.9331831092494125e-05, + "loss": 0.06681060791015625, + "step": 1875 + }, + { + "epoch": 0.12680816547248885, + "grad_norm": 0.864368736743927, + "learning_rate": 2.9330856937266287e-05, + "loss": 0.09912872314453125, + "step": 1876 + }, + { + "epoch": 0.126875760443423, + "grad_norm": 0.5554970502853394, + "learning_rate": 2.9329882088622468e-05, + "loss": 0.0769500732421875, + "step": 1877 + }, + { + "epoch": 0.12694335541435717, + "grad_norm": 0.7908527851104736, + "learning_rate": 2.9328906546609835e-05, + "loss": 0.0838165283203125, + "step": 1878 + }, + { + "epoch": 0.12701095038529134, + "grad_norm": 1.0505037307739258, + "learning_rate": 2.93279303112756e-05, + "loss": 0.195281982421875, + "step": 1879 + }, + { + "epoch": 0.1270785453562255, + "grad_norm": 0.6332255005836487, + "learning_rate": 2.9326953382666985e-05, + "loss": 0.1181640625, + "step": 1880 + }, + { + "epoch": 0.12714614032715965, + "grad_norm": 0.3908262550830841, + "learning_rate": 2.932597576083127e-05, + "loss": 0.08156204223632812, + "step": 1881 + }, + { + "epoch": 0.12721373529809382, + "grad_norm": 0.6747823357582092, + "learning_rate": 2.932499744581575e-05, + "loss": 0.14289093017578125, + "step": 1882 + }, + { + "epoch": 0.127281330269028, + "grad_norm": 1.254551887512207, + "learning_rate": 2.9324018437667773e-05, + "loss": 0.0931396484375, + "step": 1883 + }, + { + "epoch": 0.12734892523996214, + "grad_norm": 1.3008790016174316, + "learning_rate": 2.93230387364347e-05, + "loss": 0.142120361328125, + "step": 1884 + }, + { + "epoch": 0.1274165202108963, + "grad_norm": 1.0555068254470825, + "learning_rate": 2.9322058342163943e-05, + "loss": 0.12913894653320312, + "step": 1885 + }, + { + "epoch": 0.12748411518183048, + "grad_norm": 0.525194525718689, + "learning_rate": 2.9321077254902937e-05, + "loss": 0.09267425537109375, + "step": 1886 + }, + { + "epoch": 0.12755171015276465, + "grad_norm": 0.8314043879508972, + "learning_rate": 2.9320095474699145e-05, + "loss": 0.0641632080078125, + "step": 1887 + }, + { + "epoch": 0.1276193051236988, + "grad_norm": 1.6366829872131348, + "learning_rate": 2.9319113001600083e-05, + "loss": 0.18603515625, + "step": 1888 + }, + { + "epoch": 0.12768690009463296, + "grad_norm": 0.30477485060691833, + "learning_rate": 2.9318129835653286e-05, + "loss": 0.04788970947265625, + "step": 1889 + }, + { + "epoch": 0.12775449506556713, + "grad_norm": 0.8180034756660461, + "learning_rate": 2.9317145976906318e-05, + "loss": 0.06816482543945312, + "step": 1890 + }, + { + "epoch": 0.12782209003650127, + "grad_norm": 0.25459733605384827, + "learning_rate": 2.9316161425406793e-05, + "loss": 0.033901214599609375, + "step": 1891 + }, + { + "epoch": 0.12788968500743544, + "grad_norm": 1.0731102228164673, + "learning_rate": 2.9315176181202348e-05, + "loss": 0.16363525390625, + "step": 1892 + }, + { + "epoch": 0.12795727997836961, + "grad_norm": 3.0597963333129883, + "learning_rate": 2.931419024434065e-05, + "loss": 0.25592041015625, + "step": 1893 + }, + { + "epoch": 0.12802487494930379, + "grad_norm": 0.6562934517860413, + "learning_rate": 2.9313203614869416e-05, + "loss": 0.086456298828125, + "step": 1894 + }, + { + "epoch": 0.12809246992023793, + "grad_norm": 1.0679768323898315, + "learning_rate": 2.9312216292836374e-05, + "loss": 0.148773193359375, + "step": 1895 + }, + { + "epoch": 0.1281600648911721, + "grad_norm": 0.9205546975135803, + "learning_rate": 2.9311228278289302e-05, + "loss": 0.1486053466796875, + "step": 1896 + }, + { + "epoch": 0.12822765986210627, + "grad_norm": 2.239612340927124, + "learning_rate": 2.9310239571276007e-05, + "loss": 0.2178497314453125, + "step": 1897 + }, + { + "epoch": 0.1282952548330404, + "grad_norm": 1.0605287551879883, + "learning_rate": 2.9309250171844324e-05, + "loss": 0.0940093994140625, + "step": 1898 + }, + { + "epoch": 0.12836284980397458, + "grad_norm": 0.383685827255249, + "learning_rate": 2.930826008004213e-05, + "loss": 0.033573150634765625, + "step": 1899 + }, + { + "epoch": 0.12843044477490875, + "grad_norm": 1.1147569417953491, + "learning_rate": 2.9307269295917328e-05, + "loss": 0.1263275146484375, + "step": 1900 + }, + { + "epoch": 0.1284980397458429, + "grad_norm": 0.43807029724121094, + "learning_rate": 2.930627781951787e-05, + "loss": 0.05328369140625, + "step": 1901 + }, + { + "epoch": 0.12856563471677707, + "grad_norm": 0.9565463662147522, + "learning_rate": 2.930528565089172e-05, + "loss": 0.15096282958984375, + "step": 1902 + }, + { + "epoch": 0.12863322968771124, + "grad_norm": 1.1424665451049805, + "learning_rate": 2.930429279008688e-05, + "loss": 0.14226531982421875, + "step": 1903 + }, + { + "epoch": 0.1287008246586454, + "grad_norm": 0.634552538394928, + "learning_rate": 2.9303299237151402e-05, + "loss": 0.0973968505859375, + "step": 1904 + }, + { + "epoch": 0.12876841962957955, + "grad_norm": 0.5718618035316467, + "learning_rate": 2.9302304992133354e-05, + "loss": 0.093292236328125, + "step": 1905 + }, + { + "epoch": 0.12883601460051372, + "grad_norm": 0.7951143383979797, + "learning_rate": 2.9301310055080842e-05, + "loss": 0.061847686767578125, + "step": 1906 + }, + { + "epoch": 0.1289036095714479, + "grad_norm": 0.4982186555862427, + "learning_rate": 2.9300314426042014e-05, + "loss": 0.071441650390625, + "step": 1907 + }, + { + "epoch": 0.12897120454238203, + "grad_norm": 1.1944553852081299, + "learning_rate": 2.9299318105065044e-05, + "loss": 0.1309051513671875, + "step": 1908 + }, + { + "epoch": 0.1290387995133162, + "grad_norm": 1.8069267272949219, + "learning_rate": 2.9298321092198135e-05, + "loss": 0.1923065185546875, + "step": 1909 + }, + { + "epoch": 0.12910639448425038, + "grad_norm": 2.6109466552734375, + "learning_rate": 2.929732338748953e-05, + "loss": 0.254547119140625, + "step": 1910 + }, + { + "epoch": 0.12917398945518455, + "grad_norm": 0.8453229665756226, + "learning_rate": 2.9296324990987506e-05, + "loss": 0.10004425048828125, + "step": 1911 + }, + { + "epoch": 0.1292415844261187, + "grad_norm": 1.4463452100753784, + "learning_rate": 2.9295325902740372e-05, + "loss": 0.153900146484375, + "step": 1912 + }, + { + "epoch": 0.12930917939705286, + "grad_norm": 1.1193442344665527, + "learning_rate": 2.9294326122796473e-05, + "loss": 0.17242431640625, + "step": 1913 + }, + { + "epoch": 0.12937677436798703, + "grad_norm": 1.2414101362228394, + "learning_rate": 2.9293325651204176e-05, + "loss": 0.10996246337890625, + "step": 1914 + }, + { + "epoch": 0.12944436933892117, + "grad_norm": 0.8351535201072693, + "learning_rate": 2.9292324488011893e-05, + "loss": 0.15401458740234375, + "step": 1915 + }, + { + "epoch": 0.12951196430985534, + "grad_norm": 0.5789059996604919, + "learning_rate": 2.9291322633268075e-05, + "loss": 0.0576934814453125, + "step": 1916 + }, + { + "epoch": 0.1295795592807895, + "grad_norm": 0.4743656814098358, + "learning_rate": 2.9290320087021185e-05, + "loss": 0.09592628479003906, + "step": 1917 + }, + { + "epoch": 0.12964715425172368, + "grad_norm": 0.28449755907058716, + "learning_rate": 2.928931684931974e-05, + "loss": 0.06882476806640625, + "step": 1918 + }, + { + "epoch": 0.12971474922265783, + "grad_norm": 1.4375224113464355, + "learning_rate": 2.9288312920212282e-05, + "loss": 0.1796875, + "step": 1919 + }, + { + "epoch": 0.129782344193592, + "grad_norm": 0.3035885989665985, + "learning_rate": 2.9287308299747388e-05, + "loss": 0.0492706298828125, + "step": 1920 + }, + { + "epoch": 0.12984993916452617, + "grad_norm": 0.3691954016685486, + "learning_rate": 2.9286302987973663e-05, + "loss": 0.071014404296875, + "step": 1921 + }, + { + "epoch": 0.1299175341354603, + "grad_norm": 0.7449727654457092, + "learning_rate": 2.928529698493976e-05, + "loss": 0.11254119873046875, + "step": 1922 + }, + { + "epoch": 0.12998512910639448, + "grad_norm": 1.0012562274932861, + "learning_rate": 2.928429029069434e-05, + "loss": 0.1976165771484375, + "step": 1923 + }, + { + "epoch": 0.13005272407732865, + "grad_norm": 0.8369406461715698, + "learning_rate": 2.9283282905286133e-05, + "loss": 0.15814208984375, + "step": 1924 + }, + { + "epoch": 0.13012031904826282, + "grad_norm": 0.24091382324695587, + "learning_rate": 2.9282274828763868e-05, + "loss": 0.050037384033203125, + "step": 1925 + }, + { + "epoch": 0.13018791401919697, + "grad_norm": 0.48061269521713257, + "learning_rate": 2.928126606117633e-05, + "loss": 0.08640289306640625, + "step": 1926 + }, + { + "epoch": 0.13025550899013114, + "grad_norm": 0.473825603723526, + "learning_rate": 2.9280256602572318e-05, + "loss": 0.07256317138671875, + "step": 1927 + }, + { + "epoch": 0.1303231039610653, + "grad_norm": 0.49479416012763977, + "learning_rate": 2.9279246453000686e-05, + "loss": 0.0655975341796875, + "step": 1928 + }, + { + "epoch": 0.13039069893199945, + "grad_norm": 0.5387028455734253, + "learning_rate": 2.9278235612510317e-05, + "loss": 0.09235382080078125, + "step": 1929 + }, + { + "epoch": 0.13045829390293362, + "grad_norm": 0.9846755266189575, + "learning_rate": 2.9277224081150104e-05, + "loss": 0.1700439453125, + "step": 1930 + }, + { + "epoch": 0.1305258888738678, + "grad_norm": 0.45593079924583435, + "learning_rate": 2.9276211858969003e-05, + "loss": 0.0743865966796875, + "step": 1931 + }, + { + "epoch": 0.13059348384480193, + "grad_norm": 1.2615699768066406, + "learning_rate": 2.9275198946015993e-05, + "loss": 0.13202667236328125, + "step": 1932 + }, + { + "epoch": 0.1306610788157361, + "grad_norm": 1.046294093132019, + "learning_rate": 2.9274185342340076e-05, + "loss": 0.14209365844726562, + "step": 1933 + }, + { + "epoch": 0.13072867378667027, + "grad_norm": 1.2693381309509277, + "learning_rate": 2.9273171047990305e-05, + "loss": 0.14093780517578125, + "step": 1934 + }, + { + "epoch": 0.13079626875760444, + "grad_norm": 0.6327821612358093, + "learning_rate": 2.927215606301576e-05, + "loss": 0.078277587890625, + "step": 1935 + }, + { + "epoch": 0.1308638637285386, + "grad_norm": 0.5039681196212769, + "learning_rate": 2.927114038746554e-05, + "loss": 0.08428955078125, + "step": 1936 + }, + { + "epoch": 0.13093145869947276, + "grad_norm": 0.3157215118408203, + "learning_rate": 2.9270124021388797e-05, + "loss": 0.0473175048828125, + "step": 1937 + }, + { + "epoch": 0.13099905367040693, + "grad_norm": 0.4077053368091583, + "learning_rate": 2.9269106964834712e-05, + "loss": 0.0836944580078125, + "step": 1938 + }, + { + "epoch": 0.13106664864134107, + "grad_norm": 0.43788832426071167, + "learning_rate": 2.926808921785249e-05, + "loss": 0.083648681640625, + "step": 1939 + }, + { + "epoch": 0.13113424361227524, + "grad_norm": 1.9844344854354858, + "learning_rate": 2.926707078049138e-05, + "loss": 0.22821044921875, + "step": 1940 + }, + { + "epoch": 0.1312018385832094, + "grad_norm": 1.1403032541275024, + "learning_rate": 2.9266051652800666e-05, + "loss": 0.15389251708984375, + "step": 1941 + }, + { + "epoch": 0.13126943355414358, + "grad_norm": 0.5897345542907715, + "learning_rate": 2.9265031834829645e-05, + "loss": 0.07666778564453125, + "step": 1942 + }, + { + "epoch": 0.13133702852507773, + "grad_norm": 1.6194900274276733, + "learning_rate": 2.9264011326627675e-05, + "loss": 0.13784027099609375, + "step": 1943 + }, + { + "epoch": 0.1314046234960119, + "grad_norm": 2.026538848876953, + "learning_rate": 2.926299012824413e-05, + "loss": 0.25860595703125, + "step": 1944 + }, + { + "epoch": 0.13147221846694607, + "grad_norm": 1.4891022443771362, + "learning_rate": 2.9261968239728418e-05, + "loss": 0.2199554443359375, + "step": 1945 + }, + { + "epoch": 0.1315398134378802, + "grad_norm": 1.1465108394622803, + "learning_rate": 2.926094566112999e-05, + "loss": 0.13825225830078125, + "step": 1946 + }, + { + "epoch": 0.13160740840881438, + "grad_norm": 0.46427205204963684, + "learning_rate": 2.9259922392498328e-05, + "loss": 0.08826446533203125, + "step": 1947 + }, + { + "epoch": 0.13167500337974855, + "grad_norm": 0.6790876388549805, + "learning_rate": 2.9258898433882934e-05, + "loss": 0.09393310546875, + "step": 1948 + }, + { + "epoch": 0.13174259835068272, + "grad_norm": 0.8105587363243103, + "learning_rate": 2.9257873785333362e-05, + "loss": 0.07908248901367188, + "step": 1949 + }, + { + "epoch": 0.13181019332161686, + "grad_norm": 1.536381483078003, + "learning_rate": 2.9256848446899186e-05, + "loss": 0.1911468505859375, + "step": 1950 + }, + { + "epoch": 0.13187778829255103, + "grad_norm": 0.6054447293281555, + "learning_rate": 2.9255822418630017e-05, + "loss": 0.12146759033203125, + "step": 1951 + }, + { + "epoch": 0.1319453832634852, + "grad_norm": 1.4052762985229492, + "learning_rate": 2.9254795700575505e-05, + "loss": 0.20831298828125, + "step": 1952 + }, + { + "epoch": 0.13201297823441935, + "grad_norm": 1.471704125404358, + "learning_rate": 2.9253768292785327e-05, + "loss": 0.19818115234375, + "step": 1953 + }, + { + "epoch": 0.13208057320535352, + "grad_norm": 0.8539409041404724, + "learning_rate": 2.92527401953092e-05, + "loss": 0.13257598876953125, + "step": 1954 + }, + { + "epoch": 0.1321481681762877, + "grad_norm": 1.0176379680633545, + "learning_rate": 2.9251711408196863e-05, + "loss": 0.093780517578125, + "step": 1955 + }, + { + "epoch": 0.13221576314722186, + "grad_norm": 2.001781702041626, + "learning_rate": 2.9250681931498096e-05, + "loss": 0.23565673828125, + "step": 1956 + }, + { + "epoch": 0.132283358118156, + "grad_norm": 1.1804487705230713, + "learning_rate": 2.9249651765262716e-05, + "loss": 0.28564453125, + "step": 1957 + }, + { + "epoch": 0.13235095308909017, + "grad_norm": 0.9044046401977539, + "learning_rate": 2.9248620909540565e-05, + "loss": 0.1414203643798828, + "step": 1958 + }, + { + "epoch": 0.13241854806002434, + "grad_norm": 0.7705449461936951, + "learning_rate": 2.9247589364381523e-05, + "loss": 0.13141632080078125, + "step": 1959 + }, + { + "epoch": 0.1324861430309585, + "grad_norm": 1.9240926504135132, + "learning_rate": 2.92465571298355e-05, + "loss": 0.22137451171875, + "step": 1960 + }, + { + "epoch": 0.13255373800189266, + "grad_norm": 0.22516031563282013, + "learning_rate": 2.9245524205952445e-05, + "loss": 0.01923370361328125, + "step": 1961 + }, + { + "epoch": 0.13262133297282683, + "grad_norm": 0.28851351141929626, + "learning_rate": 2.9244490592782344e-05, + "loss": 0.06169891357421875, + "step": 1962 + }, + { + "epoch": 0.13268892794376097, + "grad_norm": 0.9269805550575256, + "learning_rate": 2.9243456290375195e-05, + "loss": 0.1590576171875, + "step": 1963 + }, + { + "epoch": 0.13275652291469514, + "grad_norm": 1.6654274463653564, + "learning_rate": 2.9242421298781054e-05, + "loss": 0.12299346923828125, + "step": 1964 + }, + { + "epoch": 0.1328241178856293, + "grad_norm": 0.5239940285682678, + "learning_rate": 2.9241385618049997e-05, + "loss": 0.07804107666015625, + "step": 1965 + }, + { + "epoch": 0.13289171285656348, + "grad_norm": 0.7097540497779846, + "learning_rate": 2.924034924823214e-05, + "loss": 0.092803955078125, + "step": 1966 + }, + { + "epoch": 0.13295930782749762, + "grad_norm": 0.830284833908081, + "learning_rate": 2.923931218937762e-05, + "loss": 0.1024627685546875, + "step": 1967 + }, + { + "epoch": 0.1330269027984318, + "grad_norm": 0.9817687273025513, + "learning_rate": 2.9238274441536632e-05, + "loss": 0.1923828125, + "step": 1968 + }, + { + "epoch": 0.13309449776936597, + "grad_norm": 0.6264345645904541, + "learning_rate": 2.9237236004759373e-05, + "loss": 0.10050201416015625, + "step": 1969 + }, + { + "epoch": 0.1331620927403001, + "grad_norm": 1.1581237316131592, + "learning_rate": 2.9236196879096098e-05, + "loss": 0.18438720703125, + "step": 1970 + }, + { + "epoch": 0.13322968771123428, + "grad_norm": 0.8965451717376709, + "learning_rate": 2.9235157064597087e-05, + "loss": 0.206817626953125, + "step": 1971 + }, + { + "epoch": 0.13329728268216845, + "grad_norm": 0.8310124278068542, + "learning_rate": 2.9234116561312646e-05, + "loss": 0.1037750244140625, + "step": 1972 + }, + { + "epoch": 0.13336487765310262, + "grad_norm": 6.548305988311768, + "learning_rate": 2.9233075369293126e-05, + "loss": 0.3248291015625, + "step": 1973 + }, + { + "epoch": 0.13343247262403676, + "grad_norm": 0.986916184425354, + "learning_rate": 2.9232033488588903e-05, + "loss": 0.0803375244140625, + "step": 1974 + }, + { + "epoch": 0.13350006759497093, + "grad_norm": 0.6884934902191162, + "learning_rate": 2.9230990919250395e-05, + "loss": 0.098541259765625, + "step": 1975 + }, + { + "epoch": 0.1335676625659051, + "grad_norm": 1.2285668849945068, + "learning_rate": 2.9229947661328043e-05, + "loss": 0.256622314453125, + "step": 1976 + }, + { + "epoch": 0.13363525753683925, + "grad_norm": 1.660321593284607, + "learning_rate": 2.9228903714872327e-05, + "loss": 0.19024658203125, + "step": 1977 + }, + { + "epoch": 0.13370285250777342, + "grad_norm": 1.2558228969573975, + "learning_rate": 2.9227859079933764e-05, + "loss": 0.1802215576171875, + "step": 1978 + }, + { + "epoch": 0.1337704474787076, + "grad_norm": 2.4795374870300293, + "learning_rate": 2.9226813756562895e-05, + "loss": 0.2099761962890625, + "step": 1979 + }, + { + "epoch": 0.13383804244964176, + "grad_norm": 2.622133731842041, + "learning_rate": 2.9225767744810298e-05, + "loss": 0.218719482421875, + "step": 1980 + }, + { + "epoch": 0.1339056374205759, + "grad_norm": 3.861537218093872, + "learning_rate": 2.922472104472659e-05, + "loss": 0.26385498046875, + "step": 1981 + }, + { + "epoch": 0.13397323239151007, + "grad_norm": 0.6576160192489624, + "learning_rate": 2.922367365636242e-05, + "loss": 0.0744171142578125, + "step": 1982 + }, + { + "epoch": 0.13404082736244424, + "grad_norm": 1.4412453174591064, + "learning_rate": 2.9222625579768457e-05, + "loss": 0.158172607421875, + "step": 1983 + }, + { + "epoch": 0.13410842233337839, + "grad_norm": 0.47853368520736694, + "learning_rate": 2.922157681499542e-05, + "loss": 0.07720947265625, + "step": 1984 + }, + { + "epoch": 0.13417601730431256, + "grad_norm": 0.7073861360549927, + "learning_rate": 2.9220527362094055e-05, + "loss": 0.08056640625, + "step": 1985 + }, + { + "epoch": 0.13424361227524673, + "grad_norm": 0.9242136478424072, + "learning_rate": 2.921947722111514e-05, + "loss": 0.11003875732421875, + "step": 1986 + }, + { + "epoch": 0.1343112072461809, + "grad_norm": 1.3157637119293213, + "learning_rate": 2.9218426392109484e-05, + "loss": 0.15688323974609375, + "step": 1987 + }, + { + "epoch": 0.13437880221711504, + "grad_norm": 0.7614936232566833, + "learning_rate": 2.921737487512794e-05, + "loss": 0.125732421875, + "step": 1988 + }, + { + "epoch": 0.1344463971880492, + "grad_norm": 1.07798171043396, + "learning_rate": 2.9216322670221376e-05, + "loss": 0.13037109375, + "step": 1989 + }, + { + "epoch": 0.13451399215898338, + "grad_norm": 1.193820595741272, + "learning_rate": 2.9215269777440717e-05, + "loss": 0.17311859130859375, + "step": 1990 + }, + { + "epoch": 0.13458158712991752, + "grad_norm": 1.7627038955688477, + "learning_rate": 2.92142161968369e-05, + "loss": 0.1826171875, + "step": 1991 + }, + { + "epoch": 0.1346491821008517, + "grad_norm": 0.6325958967208862, + "learning_rate": 2.9213161928460908e-05, + "loss": 0.11382293701171875, + "step": 1992 + }, + { + "epoch": 0.13471677707178586, + "grad_norm": 1.5227633714675903, + "learning_rate": 2.9212106972363748e-05, + "loss": 0.1640167236328125, + "step": 1993 + }, + { + "epoch": 0.13478437204272004, + "grad_norm": 0.6863438487052917, + "learning_rate": 2.9211051328596472e-05, + "loss": 0.08319091796875, + "step": 1994 + }, + { + "epoch": 0.13485196701365418, + "grad_norm": 0.5232049226760864, + "learning_rate": 2.920999499721015e-05, + "loss": 0.06700897216796875, + "step": 1995 + }, + { + "epoch": 0.13491956198458835, + "grad_norm": 0.2611311376094818, + "learning_rate": 2.9208937978255907e-05, + "loss": 0.05069732666015625, + "step": 1996 + }, + { + "epoch": 0.13498715695552252, + "grad_norm": 1.083235263824463, + "learning_rate": 2.9207880271784874e-05, + "loss": 0.268341064453125, + "step": 1997 + }, + { + "epoch": 0.13505475192645666, + "grad_norm": 1.130401611328125, + "learning_rate": 2.9206821877848237e-05, + "loss": 0.13887405395507812, + "step": 1998 + }, + { + "epoch": 0.13512234689739083, + "grad_norm": 0.3885684609413147, + "learning_rate": 2.920576279649721e-05, + "loss": 0.06792449951171875, + "step": 1999 + }, + { + "epoch": 0.135189941868325, + "grad_norm": 0.7020634412765503, + "learning_rate": 2.9204703027783026e-05, + "loss": 0.16497802734375, + "step": 2000 + }, + { + "epoch": 0.13525753683925915, + "grad_norm": 0.30591386556625366, + "learning_rate": 2.9203642571756978e-05, + "loss": 0.049560546875, + "step": 2001 + }, + { + "epoch": 0.13532513181019332, + "grad_norm": 0.7825917601585388, + "learning_rate": 2.9202581428470368e-05, + "loss": 0.08403778076171875, + "step": 2002 + }, + { + "epoch": 0.1353927267811275, + "grad_norm": 0.3304368853569031, + "learning_rate": 2.9201519597974544e-05, + "loss": 0.058231353759765625, + "step": 2003 + }, + { + "epoch": 0.13546032175206166, + "grad_norm": 0.3002365231513977, + "learning_rate": 2.9200457080320884e-05, + "loss": 0.03884124755859375, + "step": 2004 + }, + { + "epoch": 0.1355279167229958, + "grad_norm": 0.21587596833705902, + "learning_rate": 2.9199393875560798e-05, + "loss": 0.03197479248046875, + "step": 2005 + }, + { + "epoch": 0.13559551169392997, + "grad_norm": 0.28652775287628174, + "learning_rate": 2.9198329983745733e-05, + "loss": 0.06220245361328125, + "step": 2006 + }, + { + "epoch": 0.13566310666486414, + "grad_norm": 0.46790650486946106, + "learning_rate": 2.9197265404927163e-05, + "loss": 0.082763671875, + "step": 2007 + }, + { + "epoch": 0.13573070163579828, + "grad_norm": 1.0145580768585205, + "learning_rate": 2.9196200139156604e-05, + "loss": 0.18206787109375, + "step": 2008 + }, + { + "epoch": 0.13579829660673245, + "grad_norm": 0.2191110998392105, + "learning_rate": 2.9195134186485593e-05, + "loss": 0.035350799560546875, + "step": 2009 + }, + { + "epoch": 0.13586589157766663, + "grad_norm": 1.1111626625061035, + "learning_rate": 2.9194067546965717e-05, + "loss": 0.18115234375, + "step": 2010 + }, + { + "epoch": 0.1359334865486008, + "grad_norm": 0.6904028058052063, + "learning_rate": 2.9193000220648576e-05, + "loss": 0.06830596923828125, + "step": 2011 + }, + { + "epoch": 0.13600108151953494, + "grad_norm": 0.9269860982894897, + "learning_rate": 2.9191932207585822e-05, + "loss": 0.08351325988769531, + "step": 2012 + }, + { + "epoch": 0.1360686764904691, + "grad_norm": 0.44838637113571167, + "learning_rate": 2.9190863507829124e-05, + "loss": 0.0804901123046875, + "step": 2013 + }, + { + "epoch": 0.13613627146140328, + "grad_norm": 1.1259825229644775, + "learning_rate": 2.9189794121430206e-05, + "loss": 0.192352294921875, + "step": 2014 + }, + { + "epoch": 0.13620386643233742, + "grad_norm": 1.3872705698013306, + "learning_rate": 2.9188724048440794e-05, + "loss": 0.14960098266601562, + "step": 2015 + }, + { + "epoch": 0.1362714614032716, + "grad_norm": 2.4840095043182373, + "learning_rate": 2.9187653288912676e-05, + "loss": 0.26739501953125, + "step": 2016 + }, + { + "epoch": 0.13633905637420576, + "grad_norm": 0.46063005924224854, + "learning_rate": 2.9186581842897665e-05, + "loss": 0.0616302490234375, + "step": 2017 + }, + { + "epoch": 0.13640665134513993, + "grad_norm": 0.5073718428611755, + "learning_rate": 2.9185509710447595e-05, + "loss": 0.06046295166015625, + "step": 2018 + }, + { + "epoch": 0.13647424631607408, + "grad_norm": 1.4777488708496094, + "learning_rate": 2.918443689161435e-05, + "loss": 0.16951751708984375, + "step": 2019 + }, + { + "epoch": 0.13654184128700825, + "grad_norm": 1.0379884243011475, + "learning_rate": 2.9183363386449832e-05, + "loss": 0.1503753662109375, + "step": 2020 + }, + { + "epoch": 0.13660943625794242, + "grad_norm": 0.44626322388648987, + "learning_rate": 2.9182289195005985e-05, + "loss": 0.07602310180664062, + "step": 2021 + }, + { + "epoch": 0.13667703122887656, + "grad_norm": 0.9060035943984985, + "learning_rate": 2.9181214317334798e-05, + "loss": 0.153533935546875, + "step": 2022 + }, + { + "epoch": 0.13674462619981073, + "grad_norm": 0.7522712349891663, + "learning_rate": 2.9180138753488264e-05, + "loss": 0.126617431640625, + "step": 2023 + }, + { + "epoch": 0.1368122211707449, + "grad_norm": 0.900567352771759, + "learning_rate": 2.9179062503518433e-05, + "loss": 0.1591033935546875, + "step": 2024 + }, + { + "epoch": 0.13687981614167907, + "grad_norm": 0.4611891508102417, + "learning_rate": 2.9177985567477377e-05, + "loss": 0.09873580932617188, + "step": 2025 + }, + { + "epoch": 0.13694741111261322, + "grad_norm": 0.974586009979248, + "learning_rate": 2.9176907945417207e-05, + "loss": 0.1781005859375, + "step": 2026 + }, + { + "epoch": 0.13701500608354739, + "grad_norm": 1.2410551309585571, + "learning_rate": 2.917582963739007e-05, + "loss": 0.1618804931640625, + "step": 2027 + }, + { + "epoch": 0.13708260105448156, + "grad_norm": 1.1067450046539307, + "learning_rate": 2.9174750643448135e-05, + "loss": 0.12286376953125, + "step": 2028 + }, + { + "epoch": 0.1371501960254157, + "grad_norm": 1.5324722528457642, + "learning_rate": 2.917367096364361e-05, + "loss": 0.138885498046875, + "step": 2029 + }, + { + "epoch": 0.13721779099634987, + "grad_norm": 0.6823291778564453, + "learning_rate": 2.9172590598028742e-05, + "loss": 0.08837890625, + "step": 2030 + }, + { + "epoch": 0.13728538596728404, + "grad_norm": 0.6587632298469543, + "learning_rate": 2.9171509546655797e-05, + "loss": 0.129486083984375, + "step": 2031 + }, + { + "epoch": 0.13735298093821818, + "grad_norm": 0.47771209478378296, + "learning_rate": 2.917042780957709e-05, + "loss": 0.07809066772460938, + "step": 2032 + }, + { + "epoch": 0.13742057590915235, + "grad_norm": 0.5574473738670349, + "learning_rate": 2.9169345386844963e-05, + "loss": 0.0926666259765625, + "step": 2033 + }, + { + "epoch": 0.13748817088008652, + "grad_norm": 0.6777884364128113, + "learning_rate": 2.9168262278511788e-05, + "loss": 0.1277008056640625, + "step": 2034 + }, + { + "epoch": 0.1375557658510207, + "grad_norm": 0.36960920691490173, + "learning_rate": 2.9167178484629976e-05, + "loss": 0.09772491455078125, + "step": 2035 + }, + { + "epoch": 0.13762336082195484, + "grad_norm": 0.6538333296775818, + "learning_rate": 2.916609400525196e-05, + "loss": 0.14329910278320312, + "step": 2036 + }, + { + "epoch": 0.137690955792889, + "grad_norm": 0.7805355787277222, + "learning_rate": 2.9165008840430215e-05, + "loss": 0.1714324951171875, + "step": 2037 + }, + { + "epoch": 0.13775855076382318, + "grad_norm": 0.7335939407348633, + "learning_rate": 2.9163922990217254e-05, + "loss": 0.162109375, + "step": 2038 + }, + { + "epoch": 0.13782614573475732, + "grad_norm": 0.4736831784248352, + "learning_rate": 2.916283645466562e-05, + "loss": 0.08933830261230469, + "step": 2039 + }, + { + "epoch": 0.1378937407056915, + "grad_norm": 0.573756217956543, + "learning_rate": 2.9161749233827873e-05, + "loss": 0.1156768798828125, + "step": 2040 + }, + { + "epoch": 0.13796133567662566, + "grad_norm": 0.6633263230323792, + "learning_rate": 2.9160661327756628e-05, + "loss": 0.1375732421875, + "step": 2041 + }, + { + "epoch": 0.13802893064755983, + "grad_norm": 1.0797723531723022, + "learning_rate": 2.9159572736504524e-05, + "loss": 0.10872459411621094, + "step": 2042 + }, + { + "epoch": 0.13809652561849398, + "grad_norm": 1.78667414188385, + "learning_rate": 2.9158483460124234e-05, + "loss": 0.219512939453125, + "step": 2043 + }, + { + "epoch": 0.13816412058942815, + "grad_norm": 1.8739458322525024, + "learning_rate": 2.9157393498668463e-05, + "loss": 0.17229461669921875, + "step": 2044 + }, + { + "epoch": 0.13823171556036232, + "grad_norm": 1.4678007364273071, + "learning_rate": 2.9156302852189954e-05, + "loss": 0.13797760009765625, + "step": 2045 + }, + { + "epoch": 0.13829931053129646, + "grad_norm": 1.3417381048202515, + "learning_rate": 2.9155211520741472e-05, + "loss": 0.192901611328125, + "step": 2046 + }, + { + "epoch": 0.13836690550223063, + "grad_norm": 0.2318125218153, + "learning_rate": 2.915411950437583e-05, + "loss": 0.0475921630859375, + "step": 2047 + }, + { + "epoch": 0.1384345004731648, + "grad_norm": 0.6172881722450256, + "learning_rate": 2.9153026803145858e-05, + "loss": 0.0985870361328125, + "step": 2048 + }, + { + "epoch": 0.13850209544409897, + "grad_norm": 0.7514657378196716, + "learning_rate": 2.9151933417104436e-05, + "loss": 0.1463623046875, + "step": 2049 + }, + { + "epoch": 0.13856969041503311, + "grad_norm": 0.5986025929450989, + "learning_rate": 2.9150839346304466e-05, + "loss": 0.12142181396484375, + "step": 2050 + }, + { + "epoch": 0.13863728538596728, + "grad_norm": 0.6963147521018982, + "learning_rate": 2.914974459079888e-05, + "loss": 0.07607269287109375, + "step": 2051 + }, + { + "epoch": 0.13870488035690146, + "grad_norm": 1.2156615257263184, + "learning_rate": 2.914864915064066e-05, + "loss": 0.0919342041015625, + "step": 2052 + }, + { + "epoch": 0.1387724753278356, + "grad_norm": 0.7968429923057556, + "learning_rate": 2.9147553025882803e-05, + "loss": 0.058135986328125, + "step": 2053 + }, + { + "epoch": 0.13884007029876977, + "grad_norm": 0.7171319723129272, + "learning_rate": 2.9146456216578348e-05, + "loss": 0.089508056640625, + "step": 2054 + }, + { + "epoch": 0.13890766526970394, + "grad_norm": 0.4492036998271942, + "learning_rate": 2.9145358722780365e-05, + "loss": 0.07157135009765625, + "step": 2055 + }, + { + "epoch": 0.1389752602406381, + "grad_norm": 0.47892189025878906, + "learning_rate": 2.9144260544541956e-05, + "loss": 0.08624649047851562, + "step": 2056 + }, + { + "epoch": 0.13904285521157225, + "grad_norm": 0.5117868781089783, + "learning_rate": 2.9143161681916264e-05, + "loss": 0.123504638671875, + "step": 2057 + }, + { + "epoch": 0.13911045018250642, + "grad_norm": 0.9341139793395996, + "learning_rate": 2.914206213495645e-05, + "loss": 0.1290740966796875, + "step": 2058 + }, + { + "epoch": 0.1391780451534406, + "grad_norm": 1.0499433279037476, + "learning_rate": 2.9140961903715726e-05, + "loss": 0.0681610107421875, + "step": 2059 + }, + { + "epoch": 0.13924564012437474, + "grad_norm": 2.7713444232940674, + "learning_rate": 2.913986098824732e-05, + "loss": 0.1917572021484375, + "step": 2060 + }, + { + "epoch": 0.1393132350953089, + "grad_norm": 1.111262321472168, + "learning_rate": 2.9138759388604507e-05, + "loss": 0.1491851806640625, + "step": 2061 + }, + { + "epoch": 0.13938083006624308, + "grad_norm": 1.6179217100143433, + "learning_rate": 2.913765710484058e-05, + "loss": 0.1988525390625, + "step": 2062 + }, + { + "epoch": 0.13944842503717722, + "grad_norm": 0.45963653922080994, + "learning_rate": 2.913655413700889e-05, + "loss": 0.1152801513671875, + "step": 2063 + }, + { + "epoch": 0.1395160200081114, + "grad_norm": 0.9682022929191589, + "learning_rate": 2.9135450485162792e-05, + "loss": 0.1746673583984375, + "step": 2064 + }, + { + "epoch": 0.13958361497904556, + "grad_norm": 0.7036135196685791, + "learning_rate": 2.9134346149355696e-05, + "loss": 0.1251373291015625, + "step": 2065 + }, + { + "epoch": 0.13965120994997973, + "grad_norm": 2.7053260803222656, + "learning_rate": 2.9133241129641028e-05, + "loss": 0.2215576171875, + "step": 2066 + }, + { + "epoch": 0.13971880492091387, + "grad_norm": 1.8343498706817627, + "learning_rate": 2.913213542607226e-05, + "loss": 0.1407012939453125, + "step": 2067 + }, + { + "epoch": 0.13978639989184805, + "grad_norm": 0.8128616213798523, + "learning_rate": 2.91310290387029e-05, + "loss": 0.07289314270019531, + "step": 2068 + }, + { + "epoch": 0.13985399486278222, + "grad_norm": 1.949761986732483, + "learning_rate": 2.9129921967586465e-05, + "loss": 0.17639923095703125, + "step": 2069 + }, + { + "epoch": 0.13992158983371636, + "grad_norm": 1.8978784084320068, + "learning_rate": 2.912881421277654e-05, + "loss": 0.22084808349609375, + "step": 2070 + }, + { + "epoch": 0.13998918480465053, + "grad_norm": 0.5831576585769653, + "learning_rate": 2.9127705774326715e-05, + "loss": 0.1211090087890625, + "step": 2071 + }, + { + "epoch": 0.1400567797755847, + "grad_norm": 1.2330940961837769, + "learning_rate": 2.9126596652290626e-05, + "loss": 0.16042327880859375, + "step": 2072 + }, + { + "epoch": 0.14012437474651887, + "grad_norm": 0.9134007692337036, + "learning_rate": 2.912548684672194e-05, + "loss": 0.171539306640625, + "step": 2073 + }, + { + "epoch": 0.140191969717453, + "grad_norm": 0.6450865268707275, + "learning_rate": 2.912437635767435e-05, + "loss": 0.09820556640625, + "step": 2074 + }, + { + "epoch": 0.14025956468838718, + "grad_norm": 0.6279158592224121, + "learning_rate": 2.9123265185201596e-05, + "loss": 0.0569610595703125, + "step": 2075 + }, + { + "epoch": 0.14032715965932135, + "grad_norm": 0.39188289642333984, + "learning_rate": 2.912215332935744e-05, + "loss": 0.04929351806640625, + "step": 2076 + }, + { + "epoch": 0.1403947546302555, + "grad_norm": 0.8405542373657227, + "learning_rate": 2.9121040790195685e-05, + "loss": 0.1702728271484375, + "step": 2077 + }, + { + "epoch": 0.14046234960118967, + "grad_norm": 1.2621711492538452, + "learning_rate": 2.9119927567770158e-05, + "loss": 0.18700408935546875, + "step": 2078 + }, + { + "epoch": 0.14052994457212384, + "grad_norm": 0.46625199913978577, + "learning_rate": 2.911881366213472e-05, + "loss": 0.0832061767578125, + "step": 2079 + }, + { + "epoch": 0.140597539543058, + "grad_norm": 0.4675680994987488, + "learning_rate": 2.9117699073343273e-05, + "loss": 0.12281036376953125, + "step": 2080 + }, + { + "epoch": 0.14066513451399215, + "grad_norm": 1.0241519212722778, + "learning_rate": 2.9116583801449756e-05, + "loss": 0.13657379150390625, + "step": 2081 + }, + { + "epoch": 0.14073272948492632, + "grad_norm": 0.5086511373519897, + "learning_rate": 2.911546784650811e-05, + "loss": 0.1157989501953125, + "step": 2082 + }, + { + "epoch": 0.1408003244558605, + "grad_norm": 1.1146982908248901, + "learning_rate": 2.911435120857236e-05, + "loss": 0.225494384765625, + "step": 2083 + }, + { + "epoch": 0.14086791942679464, + "grad_norm": 0.5734338164329529, + "learning_rate": 2.9113233887696517e-05, + "loss": 0.06123542785644531, + "step": 2084 + }, + { + "epoch": 0.1409355143977288, + "grad_norm": 1.4839040040969849, + "learning_rate": 2.911211588393465e-05, + "loss": 0.15419769287109375, + "step": 2085 + }, + { + "epoch": 0.14100310936866298, + "grad_norm": 0.4721846878528595, + "learning_rate": 2.9110997197340853e-05, + "loss": 0.06073951721191406, + "step": 2086 + }, + { + "epoch": 0.14107070433959715, + "grad_norm": 0.7829835414886475, + "learning_rate": 2.910987782796926e-05, + "loss": 0.15309524536132812, + "step": 2087 + }, + { + "epoch": 0.1411382993105313, + "grad_norm": 1.1941735744476318, + "learning_rate": 2.9108757775874025e-05, + "loss": 0.1713714599609375, + "step": 2088 + }, + { + "epoch": 0.14120589428146546, + "grad_norm": 0.7749476432800293, + "learning_rate": 2.910763704110935e-05, + "loss": 0.09708213806152344, + "step": 2089 + }, + { + "epoch": 0.14127348925239963, + "grad_norm": 0.3011016249656677, + "learning_rate": 2.910651562372946e-05, + "loss": 0.0590362548828125, + "step": 2090 + }, + { + "epoch": 0.14134108422333377, + "grad_norm": 1.3652021884918213, + "learning_rate": 2.9105393523788615e-05, + "loss": 0.165374755859375, + "step": 2091 + }, + { + "epoch": 0.14140867919426794, + "grad_norm": 0.4537860155105591, + "learning_rate": 2.910427074134111e-05, + "loss": 0.0828399658203125, + "step": 2092 + }, + { + "epoch": 0.14147627416520211, + "grad_norm": 0.2692244052886963, + "learning_rate": 2.9103147276441275e-05, + "loss": 0.046600341796875, + "step": 2093 + }, + { + "epoch": 0.14154386913613626, + "grad_norm": 0.2770090103149414, + "learning_rate": 2.9102023129143468e-05, + "loss": 0.033725738525390625, + "step": 2094 + }, + { + "epoch": 0.14161146410707043, + "grad_norm": 0.7129844427108765, + "learning_rate": 2.9100898299502084e-05, + "loss": 0.05429840087890625, + "step": 2095 + }, + { + "epoch": 0.1416790590780046, + "grad_norm": 0.9762256741523743, + "learning_rate": 2.9099772787571545e-05, + "loss": 0.0745697021484375, + "step": 2096 + }, + { + "epoch": 0.14174665404893877, + "grad_norm": 1.2867155075073242, + "learning_rate": 2.9098646593406314e-05, + "loss": 0.11930084228515625, + "step": 2097 + }, + { + "epoch": 0.1418142490198729, + "grad_norm": 1.9708129167556763, + "learning_rate": 2.909751971706088e-05, + "loss": 0.186798095703125, + "step": 2098 + }, + { + "epoch": 0.14188184399080708, + "grad_norm": 0.4109620451927185, + "learning_rate": 2.9096392158589775e-05, + "loss": 0.0735015869140625, + "step": 2099 + }, + { + "epoch": 0.14194943896174125, + "grad_norm": 0.28395578265190125, + "learning_rate": 2.909526391804755e-05, + "loss": 0.0302276611328125, + "step": 2100 + }, + { + "epoch": 0.1420170339326754, + "grad_norm": 0.8187513947486877, + "learning_rate": 2.9094134995488803e-05, + "loss": 0.07415580749511719, + "step": 2101 + }, + { + "epoch": 0.14208462890360957, + "grad_norm": 1.291177749633789, + "learning_rate": 2.9093005390968153e-05, + "loss": 0.1097869873046875, + "step": 2102 + }, + { + "epoch": 0.14215222387454374, + "grad_norm": 0.5652191638946533, + "learning_rate": 2.9091875104540254e-05, + "loss": 0.059967041015625, + "step": 2103 + }, + { + "epoch": 0.1422198188454779, + "grad_norm": 1.0532677173614502, + "learning_rate": 2.9090744136259804e-05, + "loss": 0.24981689453125, + "step": 2104 + }, + { + "epoch": 0.14228741381641205, + "grad_norm": 0.8905255198478699, + "learning_rate": 2.9089612486181526e-05, + "loss": 0.234283447265625, + "step": 2105 + }, + { + "epoch": 0.14235500878734622, + "grad_norm": 0.6076107025146484, + "learning_rate": 2.908848015436017e-05, + "loss": 0.137115478515625, + "step": 2106 + }, + { + "epoch": 0.1424226037582804, + "grad_norm": 0.7371249794960022, + "learning_rate": 2.908734714085053e-05, + "loss": 0.11932373046875, + "step": 2107 + }, + { + "epoch": 0.14249019872921453, + "grad_norm": 1.6657675504684448, + "learning_rate": 2.9086213445707427e-05, + "loss": 0.299652099609375, + "step": 2108 + }, + { + "epoch": 0.1425577937001487, + "grad_norm": 0.4370476007461548, + "learning_rate": 2.9085079068985714e-05, + "loss": 0.07898712158203125, + "step": 2109 + }, + { + "epoch": 0.14262538867108288, + "grad_norm": 0.953450083732605, + "learning_rate": 2.9083944010740288e-05, + "loss": 0.2274169921875, + "step": 2110 + }, + { + "epoch": 0.14269298364201705, + "grad_norm": 0.5834885239601135, + "learning_rate": 2.9082808271026058e-05, + "loss": 0.139129638671875, + "step": 2111 + }, + { + "epoch": 0.1427605786129512, + "grad_norm": 0.3393992483615875, + "learning_rate": 2.9081671849897982e-05, + "loss": 0.05857086181640625, + "step": 2112 + }, + { + "epoch": 0.14282817358388536, + "grad_norm": 1.6673280000686646, + "learning_rate": 2.9080534747411055e-05, + "loss": 0.14067840576171875, + "step": 2113 + }, + { + "epoch": 0.14289576855481953, + "grad_norm": 0.2531488537788391, + "learning_rate": 2.9079396963620287e-05, + "loss": 0.0352630615234375, + "step": 2114 + }, + { + "epoch": 0.14296336352575367, + "grad_norm": 0.7340406179428101, + "learning_rate": 2.9078258498580738e-05, + "loss": 0.1787567138671875, + "step": 2115 + }, + { + "epoch": 0.14303095849668784, + "grad_norm": 0.31225135922431946, + "learning_rate": 2.9077119352347487e-05, + "loss": 0.09284210205078125, + "step": 2116 + }, + { + "epoch": 0.14309855346762201, + "grad_norm": 0.9937880039215088, + "learning_rate": 2.907597952497566e-05, + "loss": 0.1383819580078125, + "step": 2117 + }, + { + "epoch": 0.14316614843855618, + "grad_norm": 0.27180904150009155, + "learning_rate": 2.9074839016520407e-05, + "loss": 0.042423248291015625, + "step": 2118 + }, + { + "epoch": 0.14323374340949033, + "grad_norm": 1.3739303350448608, + "learning_rate": 2.907369782703691e-05, + "loss": 0.1637725830078125, + "step": 2119 + }, + { + "epoch": 0.1433013383804245, + "grad_norm": 0.6422386765480042, + "learning_rate": 2.907255595658039e-05, + "loss": 0.1110687255859375, + "step": 2120 + }, + { + "epoch": 0.14336893335135867, + "grad_norm": 0.4079066812992096, + "learning_rate": 2.907141340520609e-05, + "loss": 0.050083160400390625, + "step": 2121 + }, + { + "epoch": 0.1434365283222928, + "grad_norm": 0.26352640986442566, + "learning_rate": 2.9070270172969308e-05, + "loss": 0.04003143310546875, + "step": 2122 + }, + { + "epoch": 0.14350412329322698, + "grad_norm": 0.3828147053718567, + "learning_rate": 2.9069126259925352e-05, + "loss": 0.08379745483398438, + "step": 2123 + }, + { + "epoch": 0.14357171826416115, + "grad_norm": 0.35099658370018005, + "learning_rate": 2.9067981666129568e-05, + "loss": 0.07514190673828125, + "step": 2124 + }, + { + "epoch": 0.14363931323509532, + "grad_norm": 0.8519493937492371, + "learning_rate": 2.9066836391637348e-05, + "loss": 0.1708984375, + "step": 2125 + }, + { + "epoch": 0.14370690820602947, + "grad_norm": 0.7092198729515076, + "learning_rate": 2.9065690436504102e-05, + "loss": 0.20330810546875, + "step": 2126 + }, + { + "epoch": 0.14377450317696364, + "grad_norm": 0.5623978972434998, + "learning_rate": 2.906454380078528e-05, + "loss": 0.06900787353515625, + "step": 2127 + }, + { + "epoch": 0.1438420981478978, + "grad_norm": 0.5929051637649536, + "learning_rate": 2.906339648453636e-05, + "loss": 0.0970001220703125, + "step": 2128 + }, + { + "epoch": 0.14390969311883195, + "grad_norm": 1.455854058265686, + "learning_rate": 2.9062248487812856e-05, + "loss": 0.19830322265625, + "step": 2129 + }, + { + "epoch": 0.14397728808976612, + "grad_norm": 0.4706304371356964, + "learning_rate": 2.9061099810670327e-05, + "loss": 0.07297515869140625, + "step": 2130 + }, + { + "epoch": 0.1440448830607003, + "grad_norm": 0.879882276058197, + "learning_rate": 2.9059950453164337e-05, + "loss": 0.14152908325195312, + "step": 2131 + }, + { + "epoch": 0.14411247803163443, + "grad_norm": 0.7753787040710449, + "learning_rate": 2.9058800415350507e-05, + "loss": 0.080841064453125, + "step": 2132 + }, + { + "epoch": 0.1441800730025686, + "grad_norm": 1.160789132118225, + "learning_rate": 2.9057649697284483e-05, + "loss": 0.13018798828125, + "step": 2133 + }, + { + "epoch": 0.14424766797350277, + "grad_norm": 1.6715755462646484, + "learning_rate": 2.9056498299021943e-05, + "loss": 0.12237548828125, + "step": 2134 + }, + { + "epoch": 0.14431526294443695, + "grad_norm": 0.916366457939148, + "learning_rate": 2.90553462206186e-05, + "loss": 0.11525726318359375, + "step": 2135 + }, + { + "epoch": 0.1443828579153711, + "grad_norm": 0.4738450050354004, + "learning_rate": 2.9054193462130193e-05, + "loss": 0.06734085083007812, + "step": 2136 + }, + { + "epoch": 0.14445045288630526, + "grad_norm": 0.5116995573043823, + "learning_rate": 2.9053040023612508e-05, + "loss": 0.14295196533203125, + "step": 2137 + }, + { + "epoch": 0.14451804785723943, + "grad_norm": 0.4328247606754303, + "learning_rate": 2.9051885905121353e-05, + "loss": 0.0570831298828125, + "step": 2138 + }, + { + "epoch": 0.14458564282817357, + "grad_norm": 0.3438052833080292, + "learning_rate": 2.905073110671257e-05, + "loss": 0.055210113525390625, + "step": 2139 + }, + { + "epoch": 0.14465323779910774, + "grad_norm": 0.5660626292228699, + "learning_rate": 2.904957562844203e-05, + "loss": 0.104034423828125, + "step": 2140 + }, + { + "epoch": 0.1447208327700419, + "grad_norm": 0.4320739805698395, + "learning_rate": 2.9048419470365656e-05, + "loss": 0.0683441162109375, + "step": 2141 + }, + { + "epoch": 0.14478842774097608, + "grad_norm": 0.5631633400917053, + "learning_rate": 2.9047262632539375e-05, + "loss": 0.153533935546875, + "step": 2142 + }, + { + "epoch": 0.14485602271191023, + "grad_norm": 0.4887137711048126, + "learning_rate": 2.9046105115019173e-05, + "loss": 0.0664520263671875, + "step": 2143 + }, + { + "epoch": 0.1449236176828444, + "grad_norm": 0.8823772072792053, + "learning_rate": 2.904494691786105e-05, + "loss": 0.09612274169921875, + "step": 2144 + }, + { + "epoch": 0.14499121265377857, + "grad_norm": 0.7428975701332092, + "learning_rate": 2.9043788041121053e-05, + "loss": 0.15964508056640625, + "step": 2145 + }, + { + "epoch": 0.1450588076247127, + "grad_norm": 0.3313015103340149, + "learning_rate": 2.904262848485525e-05, + "loss": 0.029052734375, + "step": 2146 + }, + { + "epoch": 0.14512640259564688, + "grad_norm": 0.9903916120529175, + "learning_rate": 2.9041468249119758e-05, + "loss": 0.190155029296875, + "step": 2147 + }, + { + "epoch": 0.14519399756658105, + "grad_norm": 0.9068624377250671, + "learning_rate": 2.9040307333970705e-05, + "loss": 0.13126754760742188, + "step": 2148 + }, + { + "epoch": 0.14526159253751522, + "grad_norm": 0.706684410572052, + "learning_rate": 2.9039145739464267e-05, + "loss": 0.13872909545898438, + "step": 2149 + }, + { + "epoch": 0.14532918750844936, + "grad_norm": 0.5656771063804626, + "learning_rate": 2.903798346565665e-05, + "loss": 0.11498260498046875, + "step": 2150 + }, + { + "epoch": 0.14539678247938354, + "grad_norm": 0.8028843998908997, + "learning_rate": 2.903682051260409e-05, + "loss": 0.12881088256835938, + "step": 2151 + }, + { + "epoch": 0.1454643774503177, + "grad_norm": 0.3236325979232788, + "learning_rate": 2.903565688036286e-05, + "loss": 0.05040264129638672, + "step": 2152 + }, + { + "epoch": 0.14553197242125185, + "grad_norm": 0.5541857481002808, + "learning_rate": 2.9034492568989262e-05, + "loss": 0.06766128540039062, + "step": 2153 + }, + { + "epoch": 0.14559956739218602, + "grad_norm": 0.40409666299819946, + "learning_rate": 2.903332757853964e-05, + "loss": 0.08526229858398438, + "step": 2154 + }, + { + "epoch": 0.1456671623631202, + "grad_norm": 1.088966727256775, + "learning_rate": 2.903216190907035e-05, + "loss": 0.16146278381347656, + "step": 2155 + }, + { + "epoch": 0.14573475733405436, + "grad_norm": 0.5684337615966797, + "learning_rate": 2.903099556063781e-05, + "loss": 0.12957763671875, + "step": 2156 + }, + { + "epoch": 0.1458023523049885, + "grad_norm": 0.7337134480476379, + "learning_rate": 2.9029828533298443e-05, + "loss": 0.226043701171875, + "step": 2157 + }, + { + "epoch": 0.14586994727592267, + "grad_norm": 0.9698041677474976, + "learning_rate": 2.902866082710872e-05, + "loss": 0.222625732421875, + "step": 2158 + }, + { + "epoch": 0.14593754224685684, + "grad_norm": 0.9389615058898926, + "learning_rate": 2.902749244212514e-05, + "loss": 0.11362457275390625, + "step": 2159 + }, + { + "epoch": 0.146005137217791, + "grad_norm": 0.542285680770874, + "learning_rate": 2.9026323378404244e-05, + "loss": 0.104888916015625, + "step": 2160 + }, + { + "epoch": 0.14607273218872516, + "grad_norm": 0.41004592180252075, + "learning_rate": 2.9025153636002596e-05, + "loss": 0.07151031494140625, + "step": 2161 + }, + { + "epoch": 0.14614032715965933, + "grad_norm": 0.6849035024642944, + "learning_rate": 2.902398321497679e-05, + "loss": 0.1238555908203125, + "step": 2162 + }, + { + "epoch": 0.14620792213059347, + "grad_norm": 0.9318863153457642, + "learning_rate": 2.9022812115383464e-05, + "loss": 0.091278076171875, + "step": 2163 + }, + { + "epoch": 0.14627551710152764, + "grad_norm": 0.48501160740852356, + "learning_rate": 2.9021640337279276e-05, + "loss": 0.0387420654296875, + "step": 2164 + }, + { + "epoch": 0.1463431120724618, + "grad_norm": 0.7684586048126221, + "learning_rate": 2.9020467880720932e-05, + "loss": 0.139862060546875, + "step": 2165 + }, + { + "epoch": 0.14641070704339598, + "grad_norm": 0.5347738862037659, + "learning_rate": 2.901929474576516e-05, + "loss": 0.04661369323730469, + "step": 2166 + }, + { + "epoch": 0.14647830201433013, + "grad_norm": 1.7876672744750977, + "learning_rate": 2.9018120932468723e-05, + "loss": 0.15765380859375, + "step": 2167 + }, + { + "epoch": 0.1465458969852643, + "grad_norm": 0.6359830498695374, + "learning_rate": 2.9016946440888417e-05, + "loss": 0.04685211181640625, + "step": 2168 + }, + { + "epoch": 0.14661349195619847, + "grad_norm": 1.144135594367981, + "learning_rate": 2.9015771271081075e-05, + "loss": 0.13777923583984375, + "step": 2169 + }, + { + "epoch": 0.1466810869271326, + "grad_norm": 0.2804742157459259, + "learning_rate": 2.9014595423103553e-05, + "loss": 0.0316619873046875, + "step": 2170 + }, + { + "epoch": 0.14674868189806678, + "grad_norm": 2.066448211669922, + "learning_rate": 2.9013418897012748e-05, + "loss": 0.22027587890625, + "step": 2171 + }, + { + "epoch": 0.14681627686900095, + "grad_norm": 0.43017688393592834, + "learning_rate": 2.9012241692865587e-05, + "loss": 0.06529998779296875, + "step": 2172 + }, + { + "epoch": 0.14688387183993512, + "grad_norm": 0.6757776141166687, + "learning_rate": 2.9011063810719035e-05, + "loss": 0.14703369140625, + "step": 2173 + }, + { + "epoch": 0.14695146681086926, + "grad_norm": 0.8210265636444092, + "learning_rate": 2.900988525063008e-05, + "loss": 0.13397216796875, + "step": 2174 + }, + { + "epoch": 0.14701906178180343, + "grad_norm": 1.1121041774749756, + "learning_rate": 2.9008706012655752e-05, + "loss": 0.1329498291015625, + "step": 2175 + }, + { + "epoch": 0.1470866567527376, + "grad_norm": 0.7945305705070496, + "learning_rate": 2.9007526096853106e-05, + "loss": 0.097869873046875, + "step": 2176 + }, + { + "epoch": 0.14715425172367175, + "grad_norm": 0.41743627190589905, + "learning_rate": 2.9006345503279235e-05, + "loss": 0.0601348876953125, + "step": 2177 + }, + { + "epoch": 0.14722184669460592, + "grad_norm": 1.4590002298355103, + "learning_rate": 2.9005164231991265e-05, + "loss": 0.227508544921875, + "step": 2178 + }, + { + "epoch": 0.1472894416655401, + "grad_norm": 0.4295347332954407, + "learning_rate": 2.9003982283046352e-05, + "loss": 0.08221435546875, + "step": 2179 + }, + { + "epoch": 0.14735703663647426, + "grad_norm": 0.3090097904205322, + "learning_rate": 2.9002799656501685e-05, + "loss": 0.04416656494140625, + "step": 2180 + }, + { + "epoch": 0.1474246316074084, + "grad_norm": 0.9918429255485535, + "learning_rate": 2.900161635241449e-05, + "loss": 0.18878173828125, + "step": 2181 + }, + { + "epoch": 0.14749222657834257, + "grad_norm": 1.1703681945800781, + "learning_rate": 2.900043237084202e-05, + "loss": 0.16454315185546875, + "step": 2182 + }, + { + "epoch": 0.14755982154927674, + "grad_norm": 0.2910204231739044, + "learning_rate": 2.8999247711841566e-05, + "loss": 0.028894424438476562, + "step": 2183 + }, + { + "epoch": 0.14762741652021089, + "grad_norm": 0.8816078305244446, + "learning_rate": 2.8998062375470446e-05, + "loss": 0.216094970703125, + "step": 2184 + }, + { + "epoch": 0.14769501149114506, + "grad_norm": 0.8018770217895508, + "learning_rate": 2.8996876361786018e-05, + "loss": 0.1604156494140625, + "step": 2185 + }, + { + "epoch": 0.14776260646207923, + "grad_norm": 0.3586136996746063, + "learning_rate": 2.899568967084566e-05, + "loss": 0.05760955810546875, + "step": 2186 + }, + { + "epoch": 0.1478302014330134, + "grad_norm": 1.1247529983520508, + "learning_rate": 2.89945023027068e-05, + "loss": 0.1778411865234375, + "step": 2187 + }, + { + "epoch": 0.14789779640394754, + "grad_norm": 1.0784977674484253, + "learning_rate": 2.8993314257426893e-05, + "loss": 0.15964508056640625, + "step": 2188 + }, + { + "epoch": 0.1479653913748817, + "grad_norm": 0.997088611125946, + "learning_rate": 2.899212553506341e-05, + "loss": 0.2246551513671875, + "step": 2189 + }, + { + "epoch": 0.14803298634581588, + "grad_norm": 0.7287680506706238, + "learning_rate": 2.8990936135673884e-05, + "loss": 0.1634521484375, + "step": 2190 + }, + { + "epoch": 0.14810058131675002, + "grad_norm": 0.7165250182151794, + "learning_rate": 2.8989746059315858e-05, + "loss": 0.144622802734375, + "step": 2191 + }, + { + "epoch": 0.1481681762876842, + "grad_norm": 2.262223482131958, + "learning_rate": 2.898855530604691e-05, + "loss": 0.2181396484375, + "step": 2192 + }, + { + "epoch": 0.14823577125861837, + "grad_norm": 1.0037091970443726, + "learning_rate": 2.898736387592467e-05, + "loss": 0.18377685546875, + "step": 2193 + }, + { + "epoch": 0.1483033662295525, + "grad_norm": 1.0547841787338257, + "learning_rate": 2.8986171769006777e-05, + "loss": 0.137420654296875, + "step": 2194 + }, + { + "epoch": 0.14837096120048668, + "grad_norm": 0.5611889958381653, + "learning_rate": 2.8984978985350915e-05, + "loss": 0.07822799682617188, + "step": 2195 + }, + { + "epoch": 0.14843855617142085, + "grad_norm": 1.5628591775894165, + "learning_rate": 2.8983785525014796e-05, + "loss": 0.15836334228515625, + "step": 2196 + }, + { + "epoch": 0.14850615114235502, + "grad_norm": 0.36262574791908264, + "learning_rate": 2.8982591388056172e-05, + "loss": 0.060577392578125, + "step": 2197 + }, + { + "epoch": 0.14857374611328916, + "grad_norm": 0.49463820457458496, + "learning_rate": 2.8981396574532818e-05, + "loss": 0.0954132080078125, + "step": 2198 + }, + { + "epoch": 0.14864134108422333, + "grad_norm": 0.35489633679389954, + "learning_rate": 2.8980201084502548e-05, + "loss": 0.041900634765625, + "step": 2199 + }, + { + "epoch": 0.1487089360551575, + "grad_norm": 2.067915916442871, + "learning_rate": 2.8979004918023208e-05, + "loss": 0.1757659912109375, + "step": 2200 + }, + { + "epoch": 0.14877653102609165, + "grad_norm": 1.1431622505187988, + "learning_rate": 2.8977808075152677e-05, + "loss": 0.1986083984375, + "step": 2201 + }, + { + "epoch": 0.14884412599702582, + "grad_norm": 0.3753751516342163, + "learning_rate": 2.8976610555948862e-05, + "loss": 0.053722381591796875, + "step": 2202 + }, + { + "epoch": 0.14891172096796, + "grad_norm": 0.4980695843696594, + "learning_rate": 2.897541236046971e-05, + "loss": 0.06520843505859375, + "step": 2203 + }, + { + "epoch": 0.14897931593889416, + "grad_norm": 0.3373121917247772, + "learning_rate": 2.8974213488773196e-05, + "loss": 0.04217529296875, + "step": 2204 + }, + { + "epoch": 0.1490469109098283, + "grad_norm": 0.7026003003120422, + "learning_rate": 2.8973013940917328e-05, + "loss": 0.116058349609375, + "step": 2205 + }, + { + "epoch": 0.14911450588076247, + "grad_norm": 1.3009361028671265, + "learning_rate": 2.897181371696015e-05, + "loss": 0.161376953125, + "step": 2206 + }, + { + "epoch": 0.14918210085169664, + "grad_norm": 0.436474084854126, + "learning_rate": 2.8970612816959733e-05, + "loss": 0.0552215576171875, + "step": 2207 + }, + { + "epoch": 0.14924969582263078, + "grad_norm": 0.414931058883667, + "learning_rate": 2.896941124097419e-05, + "loss": 0.060810089111328125, + "step": 2208 + }, + { + "epoch": 0.14931729079356496, + "grad_norm": 0.5144734978675842, + "learning_rate": 2.8968208989061646e-05, + "loss": 0.13419342041015625, + "step": 2209 + }, + { + "epoch": 0.14938488576449913, + "grad_norm": 1.8154551982879639, + "learning_rate": 2.896700606128029e-05, + "loss": 0.22216796875, + "step": 2210 + }, + { + "epoch": 0.1494524807354333, + "grad_norm": 0.6554099917411804, + "learning_rate": 2.8965802457688322e-05, + "loss": 0.07172012329101562, + "step": 2211 + }, + { + "epoch": 0.14952007570636744, + "grad_norm": 1.52407968044281, + "learning_rate": 2.8964598178343978e-05, + "loss": 0.179290771484375, + "step": 2212 + }, + { + "epoch": 0.1495876706773016, + "grad_norm": 0.42883649468421936, + "learning_rate": 2.8963393223305527e-05, + "loss": 0.1041107177734375, + "step": 2213 + }, + { + "epoch": 0.14965526564823578, + "grad_norm": 0.6830030083656311, + "learning_rate": 2.8962187592631277e-05, + "loss": 0.1342926025390625, + "step": 2214 + }, + { + "epoch": 0.14972286061916992, + "grad_norm": 0.42995360493659973, + "learning_rate": 2.8960981286379563e-05, + "loss": 0.0481414794921875, + "step": 2215 + }, + { + "epoch": 0.1497904555901041, + "grad_norm": 0.24684499204158783, + "learning_rate": 2.895977430460875e-05, + "loss": 0.03257560729980469, + "step": 2216 + }, + { + "epoch": 0.14985805056103826, + "grad_norm": 2.051675319671631, + "learning_rate": 2.895856664737724e-05, + "loss": 0.227752685546875, + "step": 2217 + }, + { + "epoch": 0.14992564553197243, + "grad_norm": 0.3029772639274597, + "learning_rate": 2.8957358314743467e-05, + "loss": 0.0692138671875, + "step": 2218 + }, + { + "epoch": 0.14999324050290658, + "grad_norm": 1.164151906967163, + "learning_rate": 2.8956149306765904e-05, + "loss": 0.1678466796875, + "step": 2219 + }, + { + "epoch": 0.15006083547384075, + "grad_norm": 0.9721463918685913, + "learning_rate": 2.895493962350304e-05, + "loss": 0.210906982421875, + "step": 2220 + }, + { + "epoch": 0.15012843044477492, + "grad_norm": 0.9564982652664185, + "learning_rate": 2.8953729265013414e-05, + "loss": 0.150543212890625, + "step": 2221 + }, + { + "epoch": 0.15019602541570906, + "grad_norm": 0.5352941751480103, + "learning_rate": 2.895251823135559e-05, + "loss": 0.11241912841796875, + "step": 2222 + }, + { + "epoch": 0.15026362038664323, + "grad_norm": 0.3089677691459656, + "learning_rate": 2.8951306522588163e-05, + "loss": 0.052555084228515625, + "step": 2223 + }, + { + "epoch": 0.1503312153575774, + "grad_norm": 1.5125985145568848, + "learning_rate": 2.8950094138769763e-05, + "loss": 0.27813720703125, + "step": 2224 + }, + { + "epoch": 0.15039881032851155, + "grad_norm": 1.1368662118911743, + "learning_rate": 2.8948881079959054e-05, + "loss": 0.07079315185546875, + "step": 2225 + }, + { + "epoch": 0.15046640529944572, + "grad_norm": 0.9444746971130371, + "learning_rate": 2.8947667346214736e-05, + "loss": 0.20220947265625, + "step": 2226 + }, + { + "epoch": 0.1505340002703799, + "grad_norm": 1.1563022136688232, + "learning_rate": 2.8946452937595528e-05, + "loss": 0.1371307373046875, + "step": 2227 + }, + { + "epoch": 0.15060159524131406, + "grad_norm": 1.145633578300476, + "learning_rate": 2.8945237854160194e-05, + "loss": 0.17340087890625, + "step": 2228 + }, + { + "epoch": 0.1506691902122482, + "grad_norm": 2.357407569885254, + "learning_rate": 2.894402209596753e-05, + "loss": 0.1804351806640625, + "step": 2229 + }, + { + "epoch": 0.15073678518318237, + "grad_norm": 1.0520168542861938, + "learning_rate": 2.8942805663076362e-05, + "loss": 0.18548583984375, + "step": 2230 + }, + { + "epoch": 0.15080438015411654, + "grad_norm": 0.5662442445755005, + "learning_rate": 2.8941588555545544e-05, + "loss": 0.146942138671875, + "step": 2231 + }, + { + "epoch": 0.15087197512505068, + "grad_norm": 0.5615702271461487, + "learning_rate": 2.894037077343397e-05, + "loss": 0.08355712890625, + "step": 2232 + }, + { + "epoch": 0.15093957009598485, + "grad_norm": 0.6928112506866455, + "learning_rate": 2.8939152316800562e-05, + "loss": 0.13172531127929688, + "step": 2233 + }, + { + "epoch": 0.15100716506691902, + "grad_norm": 0.29284560680389404, + "learning_rate": 2.8937933185704282e-05, + "loss": 0.04680633544921875, + "step": 2234 + }, + { + "epoch": 0.1510747600378532, + "grad_norm": 2.9211931228637695, + "learning_rate": 2.8936713380204114e-05, + "loss": 0.2312469482421875, + "step": 2235 + }, + { + "epoch": 0.15114235500878734, + "grad_norm": 0.24856482446193695, + "learning_rate": 2.893549290035908e-05, + "loss": 0.04681396484375, + "step": 2236 + }, + { + "epoch": 0.1512099499797215, + "grad_norm": 2.2350270748138428, + "learning_rate": 2.893427174622824e-05, + "loss": 0.2048797607421875, + "step": 2237 + }, + { + "epoch": 0.15127754495065568, + "grad_norm": 0.29115262627601624, + "learning_rate": 2.8933049917870668e-05, + "loss": 0.04051017761230469, + "step": 2238 + }, + { + "epoch": 0.15134513992158982, + "grad_norm": 0.37117037177085876, + "learning_rate": 2.8931827415345496e-05, + "loss": 0.04508209228515625, + "step": 2239 + }, + { + "epoch": 0.151412734892524, + "grad_norm": 0.6943292021751404, + "learning_rate": 2.8930604238711877e-05, + "loss": 0.1562652587890625, + "step": 2240 + }, + { + "epoch": 0.15148032986345816, + "grad_norm": 0.6085984110832214, + "learning_rate": 2.8929380388028988e-05, + "loss": 0.1438140869140625, + "step": 2241 + }, + { + "epoch": 0.15154792483439233, + "grad_norm": 1.0462279319763184, + "learning_rate": 2.8928155863356047e-05, + "loss": 0.1587066650390625, + "step": 2242 + }, + { + "epoch": 0.15161551980532648, + "grad_norm": 0.27046018838882446, + "learning_rate": 2.8926930664752312e-05, + "loss": 0.03838348388671875, + "step": 2243 + }, + { + "epoch": 0.15168311477626065, + "grad_norm": 0.411173939704895, + "learning_rate": 2.892570479227706e-05, + "loss": 0.03106689453125, + "step": 2244 + }, + { + "epoch": 0.15175070974719482, + "grad_norm": 0.510261058807373, + "learning_rate": 2.8924478245989603e-05, + "loss": 0.128875732421875, + "step": 2245 + }, + { + "epoch": 0.15181830471812896, + "grad_norm": 0.6863893866539001, + "learning_rate": 2.89232510259493e-05, + "loss": 0.14995574951171875, + "step": 2246 + }, + { + "epoch": 0.15188589968906313, + "grad_norm": 0.7794633507728577, + "learning_rate": 2.892202313221552e-05, + "loss": 0.10446929931640625, + "step": 2247 + }, + { + "epoch": 0.1519534946599973, + "grad_norm": 0.468129426240921, + "learning_rate": 2.892079456484768e-05, + "loss": 0.1244354248046875, + "step": 2248 + }, + { + "epoch": 0.15202108963093147, + "grad_norm": 1.4348222017288208, + "learning_rate": 2.8919565323905227e-05, + "loss": 0.15615081787109375, + "step": 2249 + }, + { + "epoch": 0.15208868460186561, + "grad_norm": 0.23881377279758453, + "learning_rate": 2.8918335409447642e-05, + "loss": 0.038482666015625, + "step": 2250 + }, + { + "epoch": 0.15215627957279979, + "grad_norm": 0.6117674708366394, + "learning_rate": 2.891710482153443e-05, + "loss": 0.1152191162109375, + "step": 2251 + }, + { + "epoch": 0.15222387454373396, + "grad_norm": 1.399359941482544, + "learning_rate": 2.8915873560225136e-05, + "loss": 0.19482040405273438, + "step": 2252 + }, + { + "epoch": 0.1522914695146681, + "grad_norm": 1.0106157064437866, + "learning_rate": 2.8914641625579342e-05, + "loss": 0.23004150390625, + "step": 2253 + }, + { + "epoch": 0.15235906448560227, + "grad_norm": 1.020449161529541, + "learning_rate": 2.8913409017656644e-05, + "loss": 0.235931396484375, + "step": 2254 + }, + { + "epoch": 0.15242665945653644, + "grad_norm": 0.2827860116958618, + "learning_rate": 2.89121757365167e-05, + "loss": 0.07037353515625, + "step": 2255 + }, + { + "epoch": 0.15249425442747058, + "grad_norm": 0.5086636543273926, + "learning_rate": 2.8910941782219172e-05, + "loss": 0.06272125244140625, + "step": 2256 + }, + { + "epoch": 0.15256184939840475, + "grad_norm": 0.2832607626914978, + "learning_rate": 2.8909707154823767e-05, + "loss": 0.0642547607421875, + "step": 2257 + }, + { + "epoch": 0.15262944436933892, + "grad_norm": 0.3858392834663391, + "learning_rate": 2.8908471854390225e-05, + "loss": 0.1142730712890625, + "step": 2258 + }, + { + "epoch": 0.1526970393402731, + "grad_norm": 0.9329739809036255, + "learning_rate": 2.8907235880978324e-05, + "loss": 0.173095703125, + "step": 2259 + }, + { + "epoch": 0.15276463431120724, + "grad_norm": 0.5345145463943481, + "learning_rate": 2.890599923464786e-05, + "loss": 0.136077880859375, + "step": 2260 + }, + { + "epoch": 0.1528322292821414, + "grad_norm": 1.8279906511306763, + "learning_rate": 2.8904761915458672e-05, + "loss": 0.24932861328125, + "step": 2261 + }, + { + "epoch": 0.15289982425307558, + "grad_norm": 0.7707806825637817, + "learning_rate": 2.890352392347063e-05, + "loss": 0.14801025390625, + "step": 2262 + }, + { + "epoch": 0.15296741922400972, + "grad_norm": 1.0229312181472778, + "learning_rate": 2.8902285258743634e-05, + "loss": 0.228485107421875, + "step": 2263 + }, + { + "epoch": 0.1530350141949439, + "grad_norm": 0.41879987716674805, + "learning_rate": 2.8901045921337625e-05, + "loss": 0.07500839233398438, + "step": 2264 + }, + { + "epoch": 0.15310260916587806, + "grad_norm": 1.4341741800308228, + "learning_rate": 2.889980591131256e-05, + "loss": 0.242645263671875, + "step": 2265 + }, + { + "epoch": 0.15317020413681223, + "grad_norm": 0.4704703390598297, + "learning_rate": 2.8898565228728443e-05, + "loss": 0.064544677734375, + "step": 2266 + }, + { + "epoch": 0.15323779910774638, + "grad_norm": 0.8841524720191956, + "learning_rate": 2.889732387364531e-05, + "loss": 0.16827392578125, + "step": 2267 + }, + { + "epoch": 0.15330539407868055, + "grad_norm": 0.5110467672348022, + "learning_rate": 2.889608184612322e-05, + "loss": 0.0858154296875, + "step": 2268 + }, + { + "epoch": 0.15337298904961472, + "grad_norm": 0.8743040561676025, + "learning_rate": 2.889483914622227e-05, + "loss": 0.131134033203125, + "step": 2269 + }, + { + "epoch": 0.15344058402054886, + "grad_norm": 0.33586618304252625, + "learning_rate": 2.889359577400259e-05, + "loss": 0.04943084716796875, + "step": 2270 + }, + { + "epoch": 0.15350817899148303, + "grad_norm": 0.8619605302810669, + "learning_rate": 2.8892351729524345e-05, + "loss": 0.15896224975585938, + "step": 2271 + }, + { + "epoch": 0.1535757739624172, + "grad_norm": 1.488247275352478, + "learning_rate": 2.8891107012847726e-05, + "loss": 0.173431396484375, + "step": 2272 + }, + { + "epoch": 0.15364336893335137, + "grad_norm": 1.245820164680481, + "learning_rate": 2.8889861624032966e-05, + "loss": 0.267303466796875, + "step": 2273 + }, + { + "epoch": 0.1537109639042855, + "grad_norm": 0.388200044631958, + "learning_rate": 2.8888615563140318e-05, + "loss": 0.09522247314453125, + "step": 2274 + }, + { + "epoch": 0.15377855887521968, + "grad_norm": 0.18474867939949036, + "learning_rate": 2.8887368830230073e-05, + "loss": 0.0270538330078125, + "step": 2275 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 0.8692529201507568, + "learning_rate": 2.8886121425362564e-05, + "loss": 0.1343536376953125, + "step": 2276 + }, + { + "epoch": 0.153913748817088, + "grad_norm": 1.4730734825134277, + "learning_rate": 2.8884873348598146e-05, + "loss": 0.204437255859375, + "step": 2277 + }, + { + "epoch": 0.15398134378802217, + "grad_norm": 0.6822683215141296, + "learning_rate": 2.88836245999972e-05, + "loss": 0.1361083984375, + "step": 2278 + }, + { + "epoch": 0.15404893875895634, + "grad_norm": 1.6155258417129517, + "learning_rate": 2.8882375179620155e-05, + "loss": 0.188873291015625, + "step": 2279 + }, + { + "epoch": 0.1541165337298905, + "grad_norm": 1.0192159414291382, + "learning_rate": 2.8881125087527467e-05, + "loss": 0.145599365234375, + "step": 2280 + }, + { + "epoch": 0.15418412870082465, + "grad_norm": 0.6292599439620972, + "learning_rate": 2.8879874323779625e-05, + "loss": 0.13608932495117188, + "step": 2281 + }, + { + "epoch": 0.15425172367175882, + "grad_norm": 1.1392581462860107, + "learning_rate": 2.887862288843714e-05, + "loss": 0.1710052490234375, + "step": 2282 + }, + { + "epoch": 0.154319318642693, + "grad_norm": 0.7602350115776062, + "learning_rate": 2.8877370781560574e-05, + "loss": 0.08066558837890625, + "step": 2283 + }, + { + "epoch": 0.15438691361362714, + "grad_norm": 0.4232594072818756, + "learning_rate": 2.8876118003210504e-05, + "loss": 0.0468292236328125, + "step": 2284 + }, + { + "epoch": 0.1544545085845613, + "grad_norm": 0.22276131808757782, + "learning_rate": 2.8874864553447552e-05, + "loss": 0.047466278076171875, + "step": 2285 + }, + { + "epoch": 0.15452210355549548, + "grad_norm": 1.1989558935165405, + "learning_rate": 2.8873610432332364e-05, + "loss": 0.1642608642578125, + "step": 2286 + }, + { + "epoch": 0.15458969852642965, + "grad_norm": 0.19707322120666504, + "learning_rate": 2.8872355639925628e-05, + "loss": 0.02648162841796875, + "step": 2287 + }, + { + "epoch": 0.1546572934973638, + "grad_norm": 1.4375861883163452, + "learning_rate": 2.887110017628805e-05, + "loss": 0.14441680908203125, + "step": 2288 + }, + { + "epoch": 0.15472488846829796, + "grad_norm": 1.5840739011764526, + "learning_rate": 2.8869844041480382e-05, + "loss": 0.1539764404296875, + "step": 2289 + }, + { + "epoch": 0.15479248343923213, + "grad_norm": 0.6056463122367859, + "learning_rate": 2.8868587235563408e-05, + "loss": 0.05133056640625, + "step": 2290 + }, + { + "epoch": 0.15486007841016627, + "grad_norm": 1.416520357131958, + "learning_rate": 2.8867329758597934e-05, + "loss": 0.165252685546875, + "step": 2291 + }, + { + "epoch": 0.15492767338110044, + "grad_norm": 0.46484389901161194, + "learning_rate": 2.8866071610644807e-05, + "loss": 0.037441253662109375, + "step": 2292 + }, + { + "epoch": 0.15499526835203462, + "grad_norm": 0.8536309003829956, + "learning_rate": 2.88648127917649e-05, + "loss": 0.11641693115234375, + "step": 2293 + }, + { + "epoch": 0.15506286332296876, + "grad_norm": 0.4248034656047821, + "learning_rate": 2.886355330201913e-05, + "loss": 0.0825958251953125, + "step": 2294 + }, + { + "epoch": 0.15513045829390293, + "grad_norm": 1.1118344068527222, + "learning_rate": 2.886229314146843e-05, + "loss": 0.12100982666015625, + "step": 2295 + }, + { + "epoch": 0.1551980532648371, + "grad_norm": 1.0182454586029053, + "learning_rate": 2.8861032310173783e-05, + "loss": 0.1237640380859375, + "step": 2296 + }, + { + "epoch": 0.15526564823577127, + "grad_norm": 0.7044995427131653, + "learning_rate": 2.885977080819619e-05, + "loss": 0.10107421875, + "step": 2297 + }, + { + "epoch": 0.1553332432067054, + "grad_norm": 0.5202237963676453, + "learning_rate": 2.8858508635596695e-05, + "loss": 0.058135986328125, + "step": 2298 + }, + { + "epoch": 0.15540083817763958, + "grad_norm": 0.8782653212547302, + "learning_rate": 2.8857245792436363e-05, + "loss": 0.1083221435546875, + "step": 2299 + }, + { + "epoch": 0.15546843314857375, + "grad_norm": 0.735152542591095, + "learning_rate": 2.8855982278776304e-05, + "loss": 0.20782470703125, + "step": 2300 + }, + { + "epoch": 0.1555360281195079, + "grad_norm": 0.9407157897949219, + "learning_rate": 2.8854718094677656e-05, + "loss": 0.14276885986328125, + "step": 2301 + }, + { + "epoch": 0.15560362309044207, + "grad_norm": 0.29726171493530273, + "learning_rate": 2.8853453240201582e-05, + "loss": 0.038486480712890625, + "step": 2302 + }, + { + "epoch": 0.15567121806137624, + "grad_norm": 0.8136546611785889, + "learning_rate": 2.8852187715409285e-05, + "loss": 0.14762115478515625, + "step": 2303 + }, + { + "epoch": 0.1557388130323104, + "grad_norm": 1.1582436561584473, + "learning_rate": 2.8850921520362002e-05, + "loss": 0.200653076171875, + "step": 2304 + }, + { + "epoch": 0.15580640800324455, + "grad_norm": 0.792892575263977, + "learning_rate": 2.8849654655121e-05, + "loss": 0.1297607421875, + "step": 2305 + }, + { + "epoch": 0.15587400297417872, + "grad_norm": 0.7192087769508362, + "learning_rate": 2.8848387119747573e-05, + "loss": 0.0894775390625, + "step": 2306 + }, + { + "epoch": 0.1559415979451129, + "grad_norm": 0.357587605714798, + "learning_rate": 2.8847118914303057e-05, + "loss": 0.081787109375, + "step": 2307 + }, + { + "epoch": 0.15600919291604703, + "grad_norm": 1.3200105428695679, + "learning_rate": 2.884585003884881e-05, + "loss": 0.137664794921875, + "step": 2308 + }, + { + "epoch": 0.1560767878869812, + "grad_norm": 0.9458848834037781, + "learning_rate": 2.8844580493446237e-05, + "loss": 0.15087890625, + "step": 2309 + }, + { + "epoch": 0.15614438285791538, + "grad_norm": 0.4710046648979187, + "learning_rate": 2.8843310278156756e-05, + "loss": 0.0486907958984375, + "step": 2310 + }, + { + "epoch": 0.15621197782884955, + "grad_norm": 0.9474268555641174, + "learning_rate": 2.8842039393041837e-05, + "loss": 0.13195037841796875, + "step": 2311 + }, + { + "epoch": 0.1562795727997837, + "grad_norm": 0.8978335857391357, + "learning_rate": 2.8840767838162967e-05, + "loss": 0.14324569702148438, + "step": 2312 + }, + { + "epoch": 0.15634716777071786, + "grad_norm": 1.1727746725082397, + "learning_rate": 2.8839495613581675e-05, + "loss": 0.14653778076171875, + "step": 2313 + }, + { + "epoch": 0.15641476274165203, + "grad_norm": 0.7003268599510193, + "learning_rate": 2.883822271935952e-05, + "loss": 0.10651397705078125, + "step": 2314 + }, + { + "epoch": 0.15648235771258617, + "grad_norm": 0.3861343562602997, + "learning_rate": 2.8836949155558088e-05, + "loss": 0.03873443603515625, + "step": 2315 + }, + { + "epoch": 0.15654995268352034, + "grad_norm": 1.39543879032135, + "learning_rate": 2.8835674922239005e-05, + "loss": 0.2764892578125, + "step": 2316 + }, + { + "epoch": 0.15661754765445451, + "grad_norm": 0.8311353921890259, + "learning_rate": 2.883440001946393e-05, + "loss": 0.118804931640625, + "step": 2317 + }, + { + "epoch": 0.15668514262538868, + "grad_norm": 1.0580400228500366, + "learning_rate": 2.8833124447294544e-05, + "loss": 0.19028091430664062, + "step": 2318 + }, + { + "epoch": 0.15675273759632283, + "grad_norm": 0.7690226435661316, + "learning_rate": 2.883184820579257e-05, + "loss": 0.10952377319335938, + "step": 2319 + }, + { + "epoch": 0.156820332567257, + "grad_norm": 0.9812389612197876, + "learning_rate": 2.883057129501976e-05, + "loss": 0.11652374267578125, + "step": 2320 + }, + { + "epoch": 0.15688792753819117, + "grad_norm": 1.5508402585983276, + "learning_rate": 2.8829293715037902e-05, + "loss": 0.17474365234375, + "step": 2321 + }, + { + "epoch": 0.1569555225091253, + "grad_norm": 2.8482818603515625, + "learning_rate": 2.8828015465908806e-05, + "loss": 0.219268798828125, + "step": 2322 + }, + { + "epoch": 0.15702311748005948, + "grad_norm": 0.8113706707954407, + "learning_rate": 2.882673654769433e-05, + "loss": 0.0796356201171875, + "step": 2323 + }, + { + "epoch": 0.15709071245099365, + "grad_norm": 0.48536092042922974, + "learning_rate": 2.882545696045635e-05, + "loss": 0.0990142822265625, + "step": 2324 + }, + { + "epoch": 0.1571583074219278, + "grad_norm": 2.711836814880371, + "learning_rate": 2.8824176704256784e-05, + "loss": 0.2205657958984375, + "step": 2325 + }, + { + "epoch": 0.15722590239286197, + "grad_norm": 1.0444188117980957, + "learning_rate": 2.8822895779157576e-05, + "loss": 0.13983154296875, + "step": 2326 + }, + { + "epoch": 0.15729349736379614, + "grad_norm": 1.4323351383209229, + "learning_rate": 2.882161418522071e-05, + "loss": 0.253631591796875, + "step": 2327 + }, + { + "epoch": 0.1573610923347303, + "grad_norm": 0.7028105854988098, + "learning_rate": 2.8820331922508188e-05, + "loss": 0.143524169921875, + "step": 2328 + }, + { + "epoch": 0.15742868730566445, + "grad_norm": 1.0254281759262085, + "learning_rate": 2.8819048991082068e-05, + "loss": 0.13221168518066406, + "step": 2329 + }, + { + "epoch": 0.15749628227659862, + "grad_norm": 0.9141889810562134, + "learning_rate": 2.8817765391004413e-05, + "loss": 0.1268157958984375, + "step": 2330 + }, + { + "epoch": 0.1575638772475328, + "grad_norm": 0.4066772162914276, + "learning_rate": 2.8816481122337333e-05, + "loss": 0.06573486328125, + "step": 2331 + }, + { + "epoch": 0.15763147221846693, + "grad_norm": 2.344163179397583, + "learning_rate": 2.881519618514298e-05, + "loss": 0.1944732666015625, + "step": 2332 + }, + { + "epoch": 0.1576990671894011, + "grad_norm": 0.8732911944389343, + "learning_rate": 2.8813910579483516e-05, + "loss": 0.16094970703125, + "step": 2333 + }, + { + "epoch": 0.15776666216033527, + "grad_norm": 1.3419253826141357, + "learning_rate": 2.8812624305421153e-05, + "loss": 0.228118896484375, + "step": 2334 + }, + { + "epoch": 0.15783425713126945, + "grad_norm": 0.41984841227531433, + "learning_rate": 2.8811337363018122e-05, + "loss": 0.089874267578125, + "step": 2335 + }, + { + "epoch": 0.1579018521022036, + "grad_norm": 0.3605487048625946, + "learning_rate": 2.88100497523367e-05, + "loss": 0.058521270751953125, + "step": 2336 + }, + { + "epoch": 0.15796944707313776, + "grad_norm": 0.8390972018241882, + "learning_rate": 2.880876147343919e-05, + "loss": 0.172210693359375, + "step": 2337 + }, + { + "epoch": 0.15803704204407193, + "grad_norm": 0.41068723797798157, + "learning_rate": 2.8807472526387915e-05, + "loss": 0.047321319580078125, + "step": 2338 + }, + { + "epoch": 0.15810463701500607, + "grad_norm": 1.4153536558151245, + "learning_rate": 2.880618291124526e-05, + "loss": 0.1761627197265625, + "step": 2339 + }, + { + "epoch": 0.15817223198594024, + "grad_norm": 1.988095760345459, + "learning_rate": 2.8804892628073617e-05, + "loss": 0.153717041015625, + "step": 2340 + }, + { + "epoch": 0.1582398269568744, + "grad_norm": 1.1335680484771729, + "learning_rate": 2.8803601676935415e-05, + "loss": 0.1443328857421875, + "step": 2341 + }, + { + "epoch": 0.15830742192780858, + "grad_norm": 0.2368239462375641, + "learning_rate": 2.8802310057893117e-05, + "loss": 0.029638290405273438, + "step": 2342 + }, + { + "epoch": 0.15837501689874273, + "grad_norm": 0.3257851302623749, + "learning_rate": 2.8801017771009224e-05, + "loss": 0.031280517578125, + "step": 2343 + }, + { + "epoch": 0.1584426118696769, + "grad_norm": 1.2960132360458374, + "learning_rate": 2.8799724816346262e-05, + "loss": 0.13842010498046875, + "step": 2344 + }, + { + "epoch": 0.15851020684061107, + "grad_norm": 0.6421251893043518, + "learning_rate": 2.8798431193966798e-05, + "loss": 0.06158447265625, + "step": 2345 + }, + { + "epoch": 0.1585778018115452, + "grad_norm": 0.6750441193580627, + "learning_rate": 2.879713690393342e-05, + "loss": 0.10789108276367188, + "step": 2346 + }, + { + "epoch": 0.15864539678247938, + "grad_norm": 1.1772265434265137, + "learning_rate": 2.879584194630875e-05, + "loss": 0.2058563232421875, + "step": 2347 + }, + { + "epoch": 0.15871299175341355, + "grad_norm": 0.9956703782081604, + "learning_rate": 2.8794546321155462e-05, + "loss": 0.1558380126953125, + "step": 2348 + }, + { + "epoch": 0.15878058672434772, + "grad_norm": 0.48126164078712463, + "learning_rate": 2.879325002853623e-05, + "loss": 0.10479736328125, + "step": 2349 + }, + { + "epoch": 0.15884818169528186, + "grad_norm": 0.6243741512298584, + "learning_rate": 2.8791953068513777e-05, + "loss": 0.12713623046875, + "step": 2350 + }, + { + "epoch": 0.15891577666621604, + "grad_norm": 0.6097108721733093, + "learning_rate": 2.879065544115087e-05, + "loss": 0.164520263671875, + "step": 2351 + }, + { + "epoch": 0.1589833716371502, + "grad_norm": 0.5818207859992981, + "learning_rate": 2.8789357146510288e-05, + "loss": 0.0907440185546875, + "step": 2352 + }, + { + "epoch": 0.15905096660808435, + "grad_norm": 0.40453237295150757, + "learning_rate": 2.8788058184654848e-05, + "loss": 0.110870361328125, + "step": 2353 + }, + { + "epoch": 0.15911856157901852, + "grad_norm": 0.8601555228233337, + "learning_rate": 2.878675855564741e-05, + "loss": 0.12374305725097656, + "step": 2354 + }, + { + "epoch": 0.1591861565499527, + "grad_norm": 0.8228806257247925, + "learning_rate": 2.8785458259550854e-05, + "loss": 0.13267898559570312, + "step": 2355 + }, + { + "epoch": 0.15925375152088683, + "grad_norm": 0.31095078587532043, + "learning_rate": 2.8784157296428098e-05, + "loss": 0.043354034423828125, + "step": 2356 + }, + { + "epoch": 0.159321346491821, + "grad_norm": 0.7243711948394775, + "learning_rate": 2.878285566634209e-05, + "loss": 0.096343994140625, + "step": 2357 + }, + { + "epoch": 0.15938894146275517, + "grad_norm": 0.5019193887710571, + "learning_rate": 2.8781553369355805e-05, + "loss": 0.08367538452148438, + "step": 2358 + }, + { + "epoch": 0.15945653643368934, + "grad_norm": 0.42630061507225037, + "learning_rate": 2.8780250405532266e-05, + "loss": 0.05762481689453125, + "step": 2359 + }, + { + "epoch": 0.1595241314046235, + "grad_norm": 0.867561399936676, + "learning_rate": 2.877894677493451e-05, + "loss": 0.1833953857421875, + "step": 2360 + }, + { + "epoch": 0.15959172637555766, + "grad_norm": 0.4641646444797516, + "learning_rate": 2.8777642477625625e-05, + "loss": 0.06414031982421875, + "step": 2361 + }, + { + "epoch": 0.15965932134649183, + "grad_norm": 0.3385358452796936, + "learning_rate": 2.877633751366871e-05, + "loss": 0.07120132446289062, + "step": 2362 + }, + { + "epoch": 0.15972691631742597, + "grad_norm": 0.5724732875823975, + "learning_rate": 2.8775031883126908e-05, + "loss": 0.1340179443359375, + "step": 2363 + }, + { + "epoch": 0.15979451128836014, + "grad_norm": 0.45793116092681885, + "learning_rate": 2.87737255860634e-05, + "loss": 0.0699005126953125, + "step": 2364 + }, + { + "epoch": 0.1598621062592943, + "grad_norm": 1.32863187789917, + "learning_rate": 2.8772418622541395e-05, + "loss": 0.13421249389648438, + "step": 2365 + }, + { + "epoch": 0.15992970123022848, + "grad_norm": 0.7582252621650696, + "learning_rate": 2.8771110992624125e-05, + "loss": 0.1448822021484375, + "step": 2366 + }, + { + "epoch": 0.15999729620116263, + "grad_norm": 1.3145298957824707, + "learning_rate": 2.8769802696374865e-05, + "loss": 0.16075897216796875, + "step": 2367 + }, + { + "epoch": 0.1600648911720968, + "grad_norm": 0.6094275712966919, + "learning_rate": 2.8768493733856916e-05, + "loss": 0.09935760498046875, + "step": 2368 + }, + { + "epoch": 0.16013248614303097, + "grad_norm": 3.2814042568206787, + "learning_rate": 2.8767184105133612e-05, + "loss": 0.35284423828125, + "step": 2369 + }, + { + "epoch": 0.1602000811139651, + "grad_norm": 0.5027456879615784, + "learning_rate": 2.8765873810268327e-05, + "loss": 0.1075439453125, + "step": 2370 + }, + { + "epoch": 0.16026767608489928, + "grad_norm": 0.5243725776672363, + "learning_rate": 2.8764562849324454e-05, + "loss": 0.11247634887695312, + "step": 2371 + }, + { + "epoch": 0.16033527105583345, + "grad_norm": 0.17394132912158966, + "learning_rate": 2.8763251222365433e-05, + "loss": 0.03799629211425781, + "step": 2372 + }, + { + "epoch": 0.16040286602676762, + "grad_norm": 0.2720343768596649, + "learning_rate": 2.8761938929454722e-05, + "loss": 0.04047393798828125, + "step": 2373 + }, + { + "epoch": 0.16047046099770176, + "grad_norm": 0.6863456964492798, + "learning_rate": 2.8760625970655826e-05, + "loss": 0.11676025390625, + "step": 2374 + }, + { + "epoch": 0.16053805596863593, + "grad_norm": 1.7314949035644531, + "learning_rate": 2.875931234603227e-05, + "loss": 0.26104736328125, + "step": 2375 + }, + { + "epoch": 0.1606056509395701, + "grad_norm": 0.8952198028564453, + "learning_rate": 2.8757998055647608e-05, + "loss": 0.117218017578125, + "step": 2376 + }, + { + "epoch": 0.16067324591050425, + "grad_norm": 1.1484028100967407, + "learning_rate": 2.8756683099565445e-05, + "loss": 0.178924560546875, + "step": 2377 + }, + { + "epoch": 0.16074084088143842, + "grad_norm": 0.47632354497909546, + "learning_rate": 2.8755367477849398e-05, + "loss": 0.1004638671875, + "step": 2378 + }, + { + "epoch": 0.1608084358523726, + "grad_norm": 0.7085027694702148, + "learning_rate": 2.875405119056313e-05, + "loss": 0.1466522216796875, + "step": 2379 + }, + { + "epoch": 0.16087603082330676, + "grad_norm": 0.9910482168197632, + "learning_rate": 2.8752734237770327e-05, + "loss": 0.0907135009765625, + "step": 2380 + }, + { + "epoch": 0.1609436257942409, + "grad_norm": 0.19949166476726532, + "learning_rate": 2.875141661953472e-05, + "loss": 0.028270721435546875, + "step": 2381 + }, + { + "epoch": 0.16101122076517507, + "grad_norm": 0.6367469429969788, + "learning_rate": 2.8750098335920055e-05, + "loss": 0.1226043701171875, + "step": 2382 + }, + { + "epoch": 0.16107881573610924, + "grad_norm": 0.8655010461807251, + "learning_rate": 2.8748779386990123e-05, + "loss": 0.0861968994140625, + "step": 2383 + }, + { + "epoch": 0.16114641070704339, + "grad_norm": 0.9464244842529297, + "learning_rate": 2.8747459772808738e-05, + "loss": 0.13924407958984375, + "step": 2384 + }, + { + "epoch": 0.16121400567797756, + "grad_norm": 0.1602489948272705, + "learning_rate": 2.8746139493439757e-05, + "loss": 0.027246475219726562, + "step": 2385 + }, + { + "epoch": 0.16128160064891173, + "grad_norm": 0.18088583648204803, + "learning_rate": 2.8744818548947063e-05, + "loss": 0.035400390625, + "step": 2386 + }, + { + "epoch": 0.16134919561984587, + "grad_norm": 0.935975968837738, + "learning_rate": 2.8743496939394568e-05, + "loss": 0.19207763671875, + "step": 2387 + }, + { + "epoch": 0.16141679059078004, + "grad_norm": 1.3190628290176392, + "learning_rate": 2.874217466484622e-05, + "loss": 0.1962890625, + "step": 2388 + }, + { + "epoch": 0.1614843855617142, + "grad_norm": 0.468075692653656, + "learning_rate": 2.8740851725366003e-05, + "loss": 0.08856201171875, + "step": 2389 + }, + { + "epoch": 0.16155198053264838, + "grad_norm": 1.7691608667373657, + "learning_rate": 2.8739528121017923e-05, + "loss": 0.2574462890625, + "step": 2390 + }, + { + "epoch": 0.16161957550358252, + "grad_norm": 0.36737513542175293, + "learning_rate": 2.8738203851866026e-05, + "loss": 0.0828094482421875, + "step": 2391 + }, + { + "epoch": 0.1616871704745167, + "grad_norm": 1.3680835962295532, + "learning_rate": 2.8736878917974395e-05, + "loss": 0.1939697265625, + "step": 2392 + }, + { + "epoch": 0.16175476544545087, + "grad_norm": 1.207741141319275, + "learning_rate": 2.8735553319407127e-05, + "loss": 0.15411376953125, + "step": 2393 + }, + { + "epoch": 0.161822360416385, + "grad_norm": 0.4098483920097351, + "learning_rate": 2.8734227056228372e-05, + "loss": 0.10199737548828125, + "step": 2394 + }, + { + "epoch": 0.16188995538731918, + "grad_norm": 0.6997787952423096, + "learning_rate": 2.8732900128502302e-05, + "loss": 0.12213134765625, + "step": 2395 + }, + { + "epoch": 0.16195755035825335, + "grad_norm": 0.866203248500824, + "learning_rate": 2.8731572536293115e-05, + "loss": 0.13048744201660156, + "step": 2396 + }, + { + "epoch": 0.16202514532918752, + "grad_norm": 0.22485123574733734, + "learning_rate": 2.8730244279665056e-05, + "loss": 0.03524017333984375, + "step": 2397 + }, + { + "epoch": 0.16209274030012166, + "grad_norm": 0.8412677645683289, + "learning_rate": 2.8728915358682393e-05, + "loss": 0.12109375, + "step": 2398 + }, + { + "epoch": 0.16216033527105583, + "grad_norm": 0.6490027904510498, + "learning_rate": 2.8727585773409424e-05, + "loss": 0.123382568359375, + "step": 2399 + }, + { + "epoch": 0.16222793024199, + "grad_norm": 0.5669684410095215, + "learning_rate": 2.872625552391048e-05, + "loss": 0.07846832275390625, + "step": 2400 + }, + { + "epoch": 0.16229552521292415, + "grad_norm": 0.41074585914611816, + "learning_rate": 2.8724924610249938e-05, + "loss": 0.079437255859375, + "step": 2401 + }, + { + "epoch": 0.16236312018385832, + "grad_norm": 0.605832040309906, + "learning_rate": 2.8723593032492185e-05, + "loss": 0.1196441650390625, + "step": 2402 + }, + { + "epoch": 0.1624307151547925, + "grad_norm": 0.7880710959434509, + "learning_rate": 2.8722260790701656e-05, + "loss": 0.12097930908203125, + "step": 2403 + }, + { + "epoch": 0.16249831012572666, + "grad_norm": 1.3447692394256592, + "learning_rate": 2.872092788494281e-05, + "loss": 0.2108154296875, + "step": 2404 + }, + { + "epoch": 0.1625659050966608, + "grad_norm": 1.071844458580017, + "learning_rate": 2.8719594315280143e-05, + "loss": 0.13605499267578125, + "step": 2405 + }, + { + "epoch": 0.16263350006759497, + "grad_norm": 0.4659883379936218, + "learning_rate": 2.8718260081778188e-05, + "loss": 0.10491943359375, + "step": 2406 + }, + { + "epoch": 0.16270109503852914, + "grad_norm": 0.5247354507446289, + "learning_rate": 2.8716925184501494e-05, + "loss": 0.09123992919921875, + "step": 2407 + }, + { + "epoch": 0.16276869000946328, + "grad_norm": 0.43269792199134827, + "learning_rate": 2.8715589623514654e-05, + "loss": 0.0631866455078125, + "step": 2408 + }, + { + "epoch": 0.16283628498039746, + "grad_norm": 1.0861543416976929, + "learning_rate": 2.8714253398882294e-05, + "loss": 0.0944976806640625, + "step": 2409 + }, + { + "epoch": 0.16290387995133163, + "grad_norm": 0.6086909770965576, + "learning_rate": 2.871291651066906e-05, + "loss": 0.10201263427734375, + "step": 2410 + }, + { + "epoch": 0.1629714749222658, + "grad_norm": 1.7357710599899292, + "learning_rate": 2.871157895893965e-05, + "loss": 0.197235107421875, + "step": 2411 + }, + { + "epoch": 0.16303906989319994, + "grad_norm": 0.25320419669151306, + "learning_rate": 2.871024074375878e-05, + "loss": 0.0546417236328125, + "step": 2412 + }, + { + "epoch": 0.1631066648641341, + "grad_norm": 0.9515103101730347, + "learning_rate": 2.8708901865191197e-05, + "loss": 0.19085693359375, + "step": 2413 + }, + { + "epoch": 0.16317425983506828, + "grad_norm": 0.3311793804168701, + "learning_rate": 2.8707562323301694e-05, + "loss": 0.07733917236328125, + "step": 2414 + }, + { + "epoch": 0.16324185480600242, + "grad_norm": 1.4841445684432983, + "learning_rate": 2.8706222118155072e-05, + "loss": 0.187774658203125, + "step": 2415 + }, + { + "epoch": 0.1633094497769366, + "grad_norm": 0.9560360908508301, + "learning_rate": 2.8704881249816188e-05, + "loss": 0.13188743591308594, + "step": 2416 + }, + { + "epoch": 0.16337704474787076, + "grad_norm": 0.41054239869117737, + "learning_rate": 2.8703539718349917e-05, + "loss": 0.050891876220703125, + "step": 2417 + }, + { + "epoch": 0.1634446397188049, + "grad_norm": 1.0912221670150757, + "learning_rate": 2.8702197523821176e-05, + "loss": 0.1775360107421875, + "step": 2418 + }, + { + "epoch": 0.16351223468973908, + "grad_norm": 0.5856747627258301, + "learning_rate": 2.8700854666294906e-05, + "loss": 0.05916595458984375, + "step": 2419 + }, + { + "epoch": 0.16357982966067325, + "grad_norm": 1.048736572265625, + "learning_rate": 2.869951114583608e-05, + "loss": 0.1611175537109375, + "step": 2420 + }, + { + "epoch": 0.16364742463160742, + "grad_norm": 1.3410217761993408, + "learning_rate": 2.8698166962509707e-05, + "loss": 0.17218017578125, + "step": 2421 + }, + { + "epoch": 0.16371501960254156, + "grad_norm": 1.1311254501342773, + "learning_rate": 2.8696822116380836e-05, + "loss": 0.1439208984375, + "step": 2422 + }, + { + "epoch": 0.16378261457347573, + "grad_norm": 0.4221915304660797, + "learning_rate": 2.8695476607514525e-05, + "loss": 0.0429840087890625, + "step": 2423 + }, + { + "epoch": 0.1638502095444099, + "grad_norm": 1.972812294960022, + "learning_rate": 2.8694130435975883e-05, + "loss": 0.200408935546875, + "step": 2424 + }, + { + "epoch": 0.16391780451534405, + "grad_norm": 2.8900630474090576, + "learning_rate": 2.8692783601830056e-05, + "loss": 0.2862548828125, + "step": 2425 + }, + { + "epoch": 0.16398539948627822, + "grad_norm": 0.7844040393829346, + "learning_rate": 2.8691436105142196e-05, + "loss": 0.17426300048828125, + "step": 2426 + }, + { + "epoch": 0.1640529944572124, + "grad_norm": 0.6907907724380493, + "learning_rate": 2.869008794597751e-05, + "loss": 0.13714599609375, + "step": 2427 + }, + { + "epoch": 0.16412058942814656, + "grad_norm": 0.4500443935394287, + "learning_rate": 2.8688739124401237e-05, + "loss": 0.0642242431640625, + "step": 2428 + }, + { + "epoch": 0.1641881843990807, + "grad_norm": 0.9148606657981873, + "learning_rate": 2.8687389640478633e-05, + "loss": 0.116058349609375, + "step": 2429 + }, + { + "epoch": 0.16425577937001487, + "grad_norm": 0.6907846927642822, + "learning_rate": 2.8686039494275e-05, + "loss": 0.061641693115234375, + "step": 2430 + }, + { + "epoch": 0.16432337434094904, + "grad_norm": 0.5318291783332825, + "learning_rate": 2.8684688685855663e-05, + "loss": 0.047916412353515625, + "step": 2431 + }, + { + "epoch": 0.16439096931188318, + "grad_norm": 1.2883273363113403, + "learning_rate": 2.868333721528598e-05, + "loss": 0.27471923828125, + "step": 2432 + }, + { + "epoch": 0.16445856428281735, + "grad_norm": 1.294530987739563, + "learning_rate": 2.868198508263135e-05, + "loss": 0.1795654296875, + "step": 2433 + }, + { + "epoch": 0.16452615925375152, + "grad_norm": 1.2005032300949097, + "learning_rate": 2.868063228795719e-05, + "loss": 0.15107345581054688, + "step": 2434 + }, + { + "epoch": 0.1645937542246857, + "grad_norm": 0.5736878514289856, + "learning_rate": 2.8679278831328965e-05, + "loss": 0.062225341796875, + "step": 2435 + }, + { + "epoch": 0.16466134919561984, + "grad_norm": 0.4538261294364929, + "learning_rate": 2.867792471281216e-05, + "loss": 0.0855712890625, + "step": 2436 + }, + { + "epoch": 0.164728944166554, + "grad_norm": 1.5788655281066895, + "learning_rate": 2.8676569932472298e-05, + "loss": 0.25506591796875, + "step": 2437 + }, + { + "epoch": 0.16479653913748818, + "grad_norm": 0.5704379081726074, + "learning_rate": 2.867521449037492e-05, + "loss": 0.11431884765625, + "step": 2438 + }, + { + "epoch": 0.16486413410842232, + "grad_norm": 0.8484100699424744, + "learning_rate": 2.8673858386585626e-05, + "loss": 0.08650970458984375, + "step": 2439 + }, + { + "epoch": 0.1649317290793565, + "grad_norm": 0.497234970331192, + "learning_rate": 2.8672501621170026e-05, + "loss": 0.0948028564453125, + "step": 2440 + }, + { + "epoch": 0.16499932405029066, + "grad_norm": 1.3612966537475586, + "learning_rate": 2.8671144194193766e-05, + "loss": 0.21343994140625, + "step": 2441 + }, + { + "epoch": 0.16506691902122483, + "grad_norm": 0.5042200088500977, + "learning_rate": 2.8669786105722535e-05, + "loss": 0.09153366088867188, + "step": 2442 + }, + { + "epoch": 0.16513451399215898, + "grad_norm": 0.9271637201309204, + "learning_rate": 2.8668427355822036e-05, + "loss": 0.1063385009765625, + "step": 2443 + }, + { + "epoch": 0.16520210896309315, + "grad_norm": 0.42761731147766113, + "learning_rate": 2.8667067944558022e-05, + "loss": 0.06766510009765625, + "step": 2444 + }, + { + "epoch": 0.16526970393402732, + "grad_norm": 1.1319189071655273, + "learning_rate": 2.8665707871996263e-05, + "loss": 0.19730377197265625, + "step": 2445 + }, + { + "epoch": 0.16533729890496146, + "grad_norm": 0.7359156012535095, + "learning_rate": 2.866434713820257e-05, + "loss": 0.16423797607421875, + "step": 2446 + }, + { + "epoch": 0.16540489387589563, + "grad_norm": 0.2715439796447754, + "learning_rate": 2.8662985743242794e-05, + "loss": 0.053070068359375, + "step": 2447 + }, + { + "epoch": 0.1654724888468298, + "grad_norm": 0.5324820876121521, + "learning_rate": 2.8661623687182787e-05, + "loss": 0.10067367553710938, + "step": 2448 + }, + { + "epoch": 0.16554008381776397, + "grad_norm": 0.3593750298023224, + "learning_rate": 2.8660260970088475e-05, + "loss": 0.0587158203125, + "step": 2449 + }, + { + "epoch": 0.16560767878869811, + "grad_norm": 2.5211448669433594, + "learning_rate": 2.8658897592025777e-05, + "loss": 0.243133544921875, + "step": 2450 + }, + { + "epoch": 0.16567527375963229, + "grad_norm": 0.31062108278274536, + "learning_rate": 2.8657533553060674e-05, + "loss": 0.03697967529296875, + "step": 2451 + }, + { + "epoch": 0.16574286873056646, + "grad_norm": 0.33055955171585083, + "learning_rate": 2.8656168853259162e-05, + "loss": 0.05574798583984375, + "step": 2452 + }, + { + "epoch": 0.1658104637015006, + "grad_norm": 1.0760058164596558, + "learning_rate": 2.8654803492687275e-05, + "loss": 0.1518707275390625, + "step": 2453 + }, + { + "epoch": 0.16587805867243477, + "grad_norm": 0.3942853808403015, + "learning_rate": 2.865343747141107e-05, + "loss": 0.073883056640625, + "step": 2454 + }, + { + "epoch": 0.16594565364336894, + "grad_norm": 0.5722041130065918, + "learning_rate": 2.865207078949666e-05, + "loss": 0.0955810546875, + "step": 2455 + }, + { + "epoch": 0.16601324861430308, + "grad_norm": 0.45808061957359314, + "learning_rate": 2.8650703447010157e-05, + "loss": 0.06304168701171875, + "step": 2456 + }, + { + "epoch": 0.16608084358523725, + "grad_norm": 0.7609366178512573, + "learning_rate": 2.8649335444017733e-05, + "loss": 0.2423095703125, + "step": 2457 + }, + { + "epoch": 0.16614843855617142, + "grad_norm": 1.4430638551712036, + "learning_rate": 2.8647966780585575e-05, + "loss": 0.20513916015625, + "step": 2458 + }, + { + "epoch": 0.1662160335271056, + "grad_norm": 0.4559325873851776, + "learning_rate": 2.8646597456779908e-05, + "loss": 0.0737762451171875, + "step": 2459 + }, + { + "epoch": 0.16628362849803974, + "grad_norm": 0.6608869433403015, + "learning_rate": 2.8645227472666985e-05, + "loss": 0.13567352294921875, + "step": 2460 + }, + { + "epoch": 0.1663512234689739, + "grad_norm": 0.9980106353759766, + "learning_rate": 2.86438568283131e-05, + "loss": 0.1368255615234375, + "step": 2461 + }, + { + "epoch": 0.16641881843990808, + "grad_norm": 0.7661494016647339, + "learning_rate": 2.8642485523784573e-05, + "loss": 0.04030609130859375, + "step": 2462 + }, + { + "epoch": 0.16648641341084222, + "grad_norm": 0.4386613368988037, + "learning_rate": 2.8641113559147755e-05, + "loss": 0.0748291015625, + "step": 2463 + }, + { + "epoch": 0.1665540083817764, + "grad_norm": 0.32802218198776245, + "learning_rate": 2.863974093446903e-05, + "loss": 0.091796875, + "step": 2464 + }, + { + "epoch": 0.16662160335271056, + "grad_norm": 0.4232969284057617, + "learning_rate": 2.863836764981481e-05, + "loss": 0.0912628173828125, + "step": 2465 + }, + { + "epoch": 0.16668919832364473, + "grad_norm": 1.1210715770721436, + "learning_rate": 2.8636993705251553e-05, + "loss": 0.15643310546875, + "step": 2466 + }, + { + "epoch": 0.16675679329457888, + "grad_norm": 0.3943633437156677, + "learning_rate": 2.8635619100845726e-05, + "loss": 0.0572967529296875, + "step": 2467 + }, + { + "epoch": 0.16682438826551305, + "grad_norm": 1.0343650579452515, + "learning_rate": 2.8634243836663853e-05, + "loss": 0.11353302001953125, + "step": 2468 + }, + { + "epoch": 0.16689198323644722, + "grad_norm": 0.5520825982093811, + "learning_rate": 2.8632867912772473e-05, + "loss": 0.07817459106445312, + "step": 2469 + }, + { + "epoch": 0.16695957820738136, + "grad_norm": 1.2051600217819214, + "learning_rate": 2.863149132923816e-05, + "loss": 0.2141876220703125, + "step": 2470 + }, + { + "epoch": 0.16702717317831553, + "grad_norm": 0.4134657382965088, + "learning_rate": 2.8630114086127524e-05, + "loss": 0.05605316162109375, + "step": 2471 + }, + { + "epoch": 0.1670947681492497, + "grad_norm": 0.4562971293926239, + "learning_rate": 2.86287361835072e-05, + "loss": 0.07691192626953125, + "step": 2472 + }, + { + "epoch": 0.16716236312018387, + "grad_norm": 0.5691223740577698, + "learning_rate": 2.862735762144387e-05, + "loss": 0.07841873168945312, + "step": 2473 + }, + { + "epoch": 0.16722995809111801, + "grad_norm": 0.46931764483451843, + "learning_rate": 2.8625978400004228e-05, + "loss": 0.060516357421875, + "step": 2474 + }, + { + "epoch": 0.16729755306205218, + "grad_norm": 0.9856476783752441, + "learning_rate": 2.862459851925501e-05, + "loss": 0.1322021484375, + "step": 2475 + }, + { + "epoch": 0.16736514803298636, + "grad_norm": 0.3033187985420227, + "learning_rate": 2.8623217979262987e-05, + "loss": 0.060699462890625, + "step": 2476 + }, + { + "epoch": 0.1674327430039205, + "grad_norm": 0.6904470920562744, + "learning_rate": 2.8621836780094955e-05, + "loss": 0.14276123046875, + "step": 2477 + }, + { + "epoch": 0.16750033797485467, + "grad_norm": 0.536872148513794, + "learning_rate": 2.8620454921817743e-05, + "loss": 0.103729248046875, + "step": 2478 + }, + { + "epoch": 0.16756793294578884, + "grad_norm": 1.5019536018371582, + "learning_rate": 2.8619072404498224e-05, + "loss": 0.200439453125, + "step": 2479 + }, + { + "epoch": 0.167635527916723, + "grad_norm": 0.8155928254127502, + "learning_rate": 2.861768922820328e-05, + "loss": 0.1744384765625, + "step": 2480 + }, + { + "epoch": 0.16770312288765715, + "grad_norm": 0.5447115898132324, + "learning_rate": 2.8616305392999848e-05, + "loss": 0.07523345947265625, + "step": 2481 + }, + { + "epoch": 0.16777071785859132, + "grad_norm": 0.5354472398757935, + "learning_rate": 2.861492089895488e-05, + "loss": 0.09454345703125, + "step": 2482 + }, + { + "epoch": 0.1678383128295255, + "grad_norm": 1.0689126253128052, + "learning_rate": 2.8613535746135367e-05, + "loss": 0.14453125, + "step": 2483 + }, + { + "epoch": 0.16790590780045964, + "grad_norm": 0.3099651634693146, + "learning_rate": 2.8612149934608335e-05, + "loss": 0.05503082275390625, + "step": 2484 + }, + { + "epoch": 0.1679735027713938, + "grad_norm": 0.8131712675094604, + "learning_rate": 2.8610763464440836e-05, + "loss": 0.202728271484375, + "step": 2485 + }, + { + "epoch": 0.16804109774232798, + "grad_norm": 0.18428127467632294, + "learning_rate": 2.860937633569996e-05, + "loss": 0.028783798217773438, + "step": 2486 + }, + { + "epoch": 0.16810869271326212, + "grad_norm": 0.7820386290550232, + "learning_rate": 2.860798854845282e-05, + "loss": 0.1636505126953125, + "step": 2487 + }, + { + "epoch": 0.1681762876841963, + "grad_norm": 0.6334187984466553, + "learning_rate": 2.8606600102766562e-05, + "loss": 0.0771484375, + "step": 2488 + }, + { + "epoch": 0.16824388265513046, + "grad_norm": 0.8874925374984741, + "learning_rate": 2.860521099870837e-05, + "loss": 0.2059326171875, + "step": 2489 + }, + { + "epoch": 0.16831147762606463, + "grad_norm": 0.9636916518211365, + "learning_rate": 2.8603821236345465e-05, + "loss": 0.090789794921875, + "step": 2490 + }, + { + "epoch": 0.16837907259699877, + "grad_norm": 1.360790729522705, + "learning_rate": 2.8602430815745086e-05, + "loss": 0.201141357421875, + "step": 2491 + }, + { + "epoch": 0.16844666756793295, + "grad_norm": 1.0654441118240356, + "learning_rate": 2.8601039736974517e-05, + "loss": 0.17208099365234375, + "step": 2492 + }, + { + "epoch": 0.16851426253886712, + "grad_norm": 0.8706581592559814, + "learning_rate": 2.8599648000101052e-05, + "loss": 0.07006454467773438, + "step": 2493 + }, + { + "epoch": 0.16858185750980126, + "grad_norm": 2.8643901348114014, + "learning_rate": 2.8598255605192048e-05, + "loss": 0.18537139892578125, + "step": 2494 + }, + { + "epoch": 0.16864945248073543, + "grad_norm": 0.2267330288887024, + "learning_rate": 2.859686255231487e-05, + "loss": 0.033416748046875, + "step": 2495 + }, + { + "epoch": 0.1687170474516696, + "grad_norm": 0.22864730656147003, + "learning_rate": 2.8595468841536923e-05, + "loss": 0.0478515625, + "step": 2496 + }, + { + "epoch": 0.16878464242260377, + "grad_norm": 0.7869551777839661, + "learning_rate": 2.8594074472925647e-05, + "loss": 0.13946533203125, + "step": 2497 + }, + { + "epoch": 0.1688522373935379, + "grad_norm": 0.43115323781967163, + "learning_rate": 2.8592679446548506e-05, + "loss": 0.073455810546875, + "step": 2498 + }, + { + "epoch": 0.16891983236447208, + "grad_norm": 0.5006123781204224, + "learning_rate": 2.8591283762473e-05, + "loss": 0.11480712890625, + "step": 2499 + }, + { + "epoch": 0.16898742733540625, + "grad_norm": 0.47007668018341064, + "learning_rate": 2.858988742076666e-05, + "loss": 0.0552215576171875, + "step": 2500 + }, + { + "epoch": 0.1690550223063404, + "grad_norm": 0.3955107033252716, + "learning_rate": 2.8588490421497054e-05, + "loss": 0.0550689697265625, + "step": 2501 + }, + { + "epoch": 0.16912261727727457, + "grad_norm": 0.6172095537185669, + "learning_rate": 2.8587092764731778e-05, + "loss": 0.08984375, + "step": 2502 + }, + { + "epoch": 0.16919021224820874, + "grad_norm": 1.503523826599121, + "learning_rate": 2.8585694450538458e-05, + "loss": 0.17742919921875, + "step": 2503 + }, + { + "epoch": 0.1692578072191429, + "grad_norm": 1.1786458492279053, + "learning_rate": 2.8584295478984748e-05, + "loss": 0.13844680786132812, + "step": 2504 + }, + { + "epoch": 0.16932540219007705, + "grad_norm": 1.3830612897872925, + "learning_rate": 2.8582895850138343e-05, + "loss": 0.20391845703125, + "step": 2505 + }, + { + "epoch": 0.16939299716101122, + "grad_norm": 1.0851542949676514, + "learning_rate": 2.8581495564066965e-05, + "loss": 0.1577606201171875, + "step": 2506 + }, + { + "epoch": 0.1694605921319454, + "grad_norm": 0.8235257863998413, + "learning_rate": 2.8580094620838373e-05, + "loss": 0.12042236328125, + "step": 2507 + }, + { + "epoch": 0.16952818710287954, + "grad_norm": 0.603084146976471, + "learning_rate": 2.8578693020520348e-05, + "loss": 0.07198715209960938, + "step": 2508 + }, + { + "epoch": 0.1695957820738137, + "grad_norm": 1.3267868757247925, + "learning_rate": 2.8577290763180708e-05, + "loss": 0.15038681030273438, + "step": 2509 + }, + { + "epoch": 0.16966337704474788, + "grad_norm": 1.389775276184082, + "learning_rate": 2.8575887848887305e-05, + "loss": 0.13178253173828125, + "step": 2510 + }, + { + "epoch": 0.16973097201568205, + "grad_norm": 0.8152663111686707, + "learning_rate": 2.857448427770802e-05, + "loss": 0.0913238525390625, + "step": 2511 + }, + { + "epoch": 0.1697985669866162, + "grad_norm": 2.776543140411377, + "learning_rate": 2.857308004971077e-05, + "loss": 0.203460693359375, + "step": 2512 + }, + { + "epoch": 0.16986616195755036, + "grad_norm": 0.7038245797157288, + "learning_rate": 2.8571675164963493e-05, + "loss": 0.09983062744140625, + "step": 2513 + }, + { + "epoch": 0.16993375692848453, + "grad_norm": 0.7551547884941101, + "learning_rate": 2.857026962353417e-05, + "loss": 0.145172119140625, + "step": 2514 + }, + { + "epoch": 0.17000135189941867, + "grad_norm": 0.296983003616333, + "learning_rate": 2.8568863425490815e-05, + "loss": 0.0496368408203125, + "step": 2515 + }, + { + "epoch": 0.17006894687035284, + "grad_norm": 0.48229625821113586, + "learning_rate": 2.8567456570901457e-05, + "loss": 0.05880546569824219, + "step": 2516 + }, + { + "epoch": 0.17013654184128701, + "grad_norm": 0.2478274554014206, + "learning_rate": 2.8566049059834182e-05, + "loss": 0.044586181640625, + "step": 2517 + }, + { + "epoch": 0.17020413681222116, + "grad_norm": 0.7991356253623962, + "learning_rate": 2.8564640892357084e-05, + "loss": 0.11437225341796875, + "step": 2518 + }, + { + "epoch": 0.17027173178315533, + "grad_norm": 0.745997965335846, + "learning_rate": 2.85632320685383e-05, + "loss": 0.08458709716796875, + "step": 2519 + }, + { + "epoch": 0.1703393267540895, + "grad_norm": 0.5809643864631653, + "learning_rate": 2.8561822588446e-05, + "loss": 0.07720184326171875, + "step": 2520 + }, + { + "epoch": 0.17040692172502367, + "grad_norm": 0.24895048141479492, + "learning_rate": 2.8560412452148387e-05, + "loss": 0.028682708740234375, + "step": 2521 + }, + { + "epoch": 0.1704745166959578, + "grad_norm": 0.9469398856163025, + "learning_rate": 2.8559001659713686e-05, + "loss": 0.231170654296875, + "step": 2522 + }, + { + "epoch": 0.17054211166689198, + "grad_norm": 0.3864627182483673, + "learning_rate": 2.8557590211210157e-05, + "loss": 0.06610107421875, + "step": 2523 + }, + { + "epoch": 0.17060970663782615, + "grad_norm": 2.4094176292419434, + "learning_rate": 2.855617810670611e-05, + "loss": 0.2293701171875, + "step": 2524 + }, + { + "epoch": 0.1706773016087603, + "grad_norm": 0.2934224009513855, + "learning_rate": 2.8554765346269855e-05, + "loss": 0.042083740234375, + "step": 2525 + }, + { + "epoch": 0.17074489657969447, + "grad_norm": 0.4769308567047119, + "learning_rate": 2.8553351929969758e-05, + "loss": 0.11236572265625, + "step": 2526 + }, + { + "epoch": 0.17081249155062864, + "grad_norm": 0.9202542901039124, + "learning_rate": 2.8551937857874205e-05, + "loss": 0.13360595703125, + "step": 2527 + }, + { + "epoch": 0.1708800865215628, + "grad_norm": 0.21119475364685059, + "learning_rate": 2.8550523130051623e-05, + "loss": 0.04283905029296875, + "step": 2528 + }, + { + "epoch": 0.17094768149249695, + "grad_norm": 1.7251981496810913, + "learning_rate": 2.854910774657046e-05, + "loss": 0.2611083984375, + "step": 2529 + }, + { + "epoch": 0.17101527646343112, + "grad_norm": 1.3810900449752808, + "learning_rate": 2.8547691707499204e-05, + "loss": 0.13802146911621094, + "step": 2530 + }, + { + "epoch": 0.1710828714343653, + "grad_norm": 0.4679346978664398, + "learning_rate": 2.854627501290637e-05, + "loss": 0.07018280029296875, + "step": 2531 + }, + { + "epoch": 0.17115046640529943, + "grad_norm": 0.5735490322113037, + "learning_rate": 2.854485766286051e-05, + "loss": 0.1484375, + "step": 2532 + }, + { + "epoch": 0.1712180613762336, + "grad_norm": 0.6944669485092163, + "learning_rate": 2.8543439657430197e-05, + "loss": 0.1181793212890625, + "step": 2533 + }, + { + "epoch": 0.17128565634716778, + "grad_norm": 0.9935725927352905, + "learning_rate": 2.8542020996684052e-05, + "loss": 0.10566329956054688, + "step": 2534 + }, + { + "epoch": 0.17135325131810195, + "grad_norm": 0.3180501163005829, + "learning_rate": 2.854060168069071e-05, + "loss": 0.0739898681640625, + "step": 2535 + }, + { + "epoch": 0.1714208462890361, + "grad_norm": 0.7112829089164734, + "learning_rate": 2.8539181709518857e-05, + "loss": 0.1194305419921875, + "step": 2536 + }, + { + "epoch": 0.17148844125997026, + "grad_norm": 0.8592662215232849, + "learning_rate": 2.853776108323719e-05, + "loss": 0.18682861328125, + "step": 2537 + }, + { + "epoch": 0.17155603623090443, + "grad_norm": 0.6916146278381348, + "learning_rate": 2.8536339801914453e-05, + "loss": 0.1259918212890625, + "step": 2538 + }, + { + "epoch": 0.17162363120183857, + "grad_norm": 0.24895301461219788, + "learning_rate": 2.853491786561941e-05, + "loss": 0.029218673706054688, + "step": 2539 + }, + { + "epoch": 0.17169122617277274, + "grad_norm": 0.22266988456249237, + "learning_rate": 2.853349527442087e-05, + "loss": 0.03827667236328125, + "step": 2540 + }, + { + "epoch": 0.1717588211437069, + "grad_norm": 0.9803117513656616, + "learning_rate": 2.853207202838767e-05, + "loss": 0.1367950439453125, + "step": 2541 + }, + { + "epoch": 0.17182641611464108, + "grad_norm": 1.0252889394760132, + "learning_rate": 2.8530648127588667e-05, + "loss": 0.1746978759765625, + "step": 2542 + }, + { + "epoch": 0.17189401108557523, + "grad_norm": 0.5702139139175415, + "learning_rate": 2.852922357209276e-05, + "loss": 0.12969970703125, + "step": 2543 + }, + { + "epoch": 0.1719616060565094, + "grad_norm": 0.37439775466918945, + "learning_rate": 2.8527798361968877e-05, + "loss": 0.0717926025390625, + "step": 2544 + }, + { + "epoch": 0.17202920102744357, + "grad_norm": 0.7282090783119202, + "learning_rate": 2.8526372497285988e-05, + "loss": 0.1014251708984375, + "step": 2545 + }, + { + "epoch": 0.1720967959983777, + "grad_norm": 0.5196105241775513, + "learning_rate": 2.8524945978113073e-05, + "loss": 0.07878875732421875, + "step": 2546 + }, + { + "epoch": 0.17216439096931188, + "grad_norm": 0.9088237285614014, + "learning_rate": 2.852351880451916e-05, + "loss": 0.12311553955078125, + "step": 2547 + }, + { + "epoch": 0.17223198594024605, + "grad_norm": 0.5108739733695984, + "learning_rate": 2.8522090976573306e-05, + "loss": 0.09401702880859375, + "step": 2548 + }, + { + "epoch": 0.1722995809111802, + "grad_norm": 1.1784069538116455, + "learning_rate": 2.8520662494344602e-05, + "loss": 0.15097427368164062, + "step": 2549 + }, + { + "epoch": 0.17236717588211437, + "grad_norm": 0.5017846822738647, + "learning_rate": 2.8519233357902157e-05, + "loss": 0.0772705078125, + "step": 2550 + }, + { + "epoch": 0.17243477085304854, + "grad_norm": 0.3154131770133972, + "learning_rate": 2.8517803567315127e-05, + "loss": 0.04257965087890625, + "step": 2551 + }, + { + "epoch": 0.1725023658239827, + "grad_norm": 0.49968209862709045, + "learning_rate": 2.8516373122652695e-05, + "loss": 0.0627288818359375, + "step": 2552 + }, + { + "epoch": 0.17256996079491685, + "grad_norm": 1.2954157590866089, + "learning_rate": 2.8514942023984075e-05, + "loss": 0.1246185302734375, + "step": 2553 + }, + { + "epoch": 0.17263755576585102, + "grad_norm": 1.012951135635376, + "learning_rate": 2.851351027137851e-05, + "loss": 0.16558837890625, + "step": 2554 + }, + { + "epoch": 0.1727051507367852, + "grad_norm": 0.6782439947128296, + "learning_rate": 2.8512077864905283e-05, + "loss": 0.09902572631835938, + "step": 2555 + }, + { + "epoch": 0.17277274570771933, + "grad_norm": 0.2318519800901413, + "learning_rate": 2.851064480463369e-05, + "loss": 0.019041061401367188, + "step": 2556 + }, + { + "epoch": 0.1728403406786535, + "grad_norm": 0.39530056715011597, + "learning_rate": 2.8509211090633086e-05, + "loss": 0.06223297119140625, + "step": 2557 + }, + { + "epoch": 0.17290793564958767, + "grad_norm": 1.3672274351119995, + "learning_rate": 2.850777672297284e-05, + "loss": 0.1966552734375, + "step": 2558 + }, + { + "epoch": 0.17297553062052184, + "grad_norm": 0.5544204115867615, + "learning_rate": 2.8506341701722342e-05, + "loss": 0.0864105224609375, + "step": 2559 + }, + { + "epoch": 0.173043125591456, + "grad_norm": 0.5750395059585571, + "learning_rate": 2.8504906026951042e-05, + "loss": 0.08589935302734375, + "step": 2560 + }, + { + "epoch": 0.17311072056239016, + "grad_norm": 0.7245415449142456, + "learning_rate": 2.8503469698728403e-05, + "loss": 0.13013076782226562, + "step": 2561 + }, + { + "epoch": 0.17317831553332433, + "grad_norm": 0.7252517342567444, + "learning_rate": 2.850203271712392e-05, + "loss": 0.16416168212890625, + "step": 2562 + }, + { + "epoch": 0.17324591050425847, + "grad_norm": 0.41234728693962097, + "learning_rate": 2.8500595082207132e-05, + "loss": 0.092864990234375, + "step": 2563 + }, + { + "epoch": 0.17331350547519264, + "grad_norm": 1.6195709705352783, + "learning_rate": 2.849915679404759e-05, + "loss": 0.2547607421875, + "step": 2564 + }, + { + "epoch": 0.1733811004461268, + "grad_norm": 1.1337757110595703, + "learning_rate": 2.8497717852714895e-05, + "loss": 0.1564178466796875, + "step": 2565 + }, + { + "epoch": 0.17344869541706098, + "grad_norm": 1.0300382375717163, + "learning_rate": 2.8496278258278665e-05, + "loss": 0.155792236328125, + "step": 2566 + }, + { + "epoch": 0.17351629038799513, + "grad_norm": 0.8522441983222961, + "learning_rate": 2.849483801080856e-05, + "loss": 0.1660003662109375, + "step": 2567 + }, + { + "epoch": 0.1735838853589293, + "grad_norm": 1.6326676607131958, + "learning_rate": 2.8493397110374274e-05, + "loss": 0.2705078125, + "step": 2568 + }, + { + "epoch": 0.17365148032986347, + "grad_norm": 0.7689064145088196, + "learning_rate": 2.8491955557045517e-05, + "loss": 0.10198974609375, + "step": 2569 + }, + { + "epoch": 0.1737190753007976, + "grad_norm": 1.3378413915634155, + "learning_rate": 2.8490513350892045e-05, + "loss": 0.201690673828125, + "step": 2570 + }, + { + "epoch": 0.17378667027173178, + "grad_norm": 2.705375909805298, + "learning_rate": 2.8489070491983644e-05, + "loss": 0.249237060546875, + "step": 2571 + }, + { + "epoch": 0.17385426524266595, + "grad_norm": 1.1188201904296875, + "learning_rate": 2.848762698039012e-05, + "loss": 0.2022857666015625, + "step": 2572 + }, + { + "epoch": 0.17392186021360012, + "grad_norm": 0.4312098026275635, + "learning_rate": 2.8486182816181326e-05, + "loss": 0.1138916015625, + "step": 2573 + }, + { + "epoch": 0.17398945518453426, + "grad_norm": 0.9967972636222839, + "learning_rate": 2.8484737999427137e-05, + "loss": 0.1354217529296875, + "step": 2574 + }, + { + "epoch": 0.17405705015546843, + "grad_norm": 0.3387812674045563, + "learning_rate": 2.8483292530197468e-05, + "loss": 0.06412506103515625, + "step": 2575 + }, + { + "epoch": 0.1741246451264026, + "grad_norm": 0.5444138050079346, + "learning_rate": 2.8481846408562247e-05, + "loss": 0.04586219787597656, + "step": 2576 + }, + { + "epoch": 0.17419224009733675, + "grad_norm": 0.3461367189884186, + "learning_rate": 2.848039963459146e-05, + "loss": 0.06282424926757812, + "step": 2577 + }, + { + "epoch": 0.17425983506827092, + "grad_norm": 0.3384467363357544, + "learning_rate": 2.84789522083551e-05, + "loss": 0.0623626708984375, + "step": 2578 + }, + { + "epoch": 0.1743274300392051, + "grad_norm": 1.4103437662124634, + "learning_rate": 2.8477504129923213e-05, + "loss": 0.225006103515625, + "step": 2579 + }, + { + "epoch": 0.17439502501013923, + "grad_norm": 1.111169695854187, + "learning_rate": 2.8476055399365857e-05, + "loss": 0.16375732421875, + "step": 2580 + }, + { + "epoch": 0.1744626199810734, + "grad_norm": 1.0982931852340698, + "learning_rate": 2.8474606016753136e-05, + "loss": 0.15560150146484375, + "step": 2581 + }, + { + "epoch": 0.17453021495200757, + "grad_norm": 0.5759720802307129, + "learning_rate": 2.8473155982155174e-05, + "loss": 0.1270294189453125, + "step": 2582 + }, + { + "epoch": 0.17459780992294174, + "grad_norm": 0.6763030290603638, + "learning_rate": 2.847170529564214e-05, + "loss": 0.1197052001953125, + "step": 2583 + }, + { + "epoch": 0.1746654048938759, + "grad_norm": 0.5889250636100769, + "learning_rate": 2.8470253957284225e-05, + "loss": 0.139068603515625, + "step": 2584 + }, + { + "epoch": 0.17473299986481006, + "grad_norm": 0.24472413957118988, + "learning_rate": 2.846880196715165e-05, + "loss": 0.03408050537109375, + "step": 2585 + }, + { + "epoch": 0.17480059483574423, + "grad_norm": 2.3852765560150146, + "learning_rate": 2.8467349325314677e-05, + "loss": 0.28277587890625, + "step": 2586 + }, + { + "epoch": 0.17486818980667837, + "grad_norm": 0.7167336344718933, + "learning_rate": 2.8465896031843587e-05, + "loss": 0.1457061767578125, + "step": 2587 + }, + { + "epoch": 0.17493578477761254, + "grad_norm": 1.0576192140579224, + "learning_rate": 2.8464442086808707e-05, + "loss": 0.228240966796875, + "step": 2588 + }, + { + "epoch": 0.1750033797485467, + "grad_norm": 0.19282585382461548, + "learning_rate": 2.8462987490280383e-05, + "loss": 0.026529312133789062, + "step": 2589 + }, + { + "epoch": 0.17507097471948088, + "grad_norm": 1.572889804840088, + "learning_rate": 2.8461532242329e-05, + "loss": 0.264556884765625, + "step": 2590 + }, + { + "epoch": 0.17513856969041502, + "grad_norm": 0.23661473393440247, + "learning_rate": 2.8460076343024972e-05, + "loss": 0.053802490234375, + "step": 2591 + }, + { + "epoch": 0.1752061646613492, + "grad_norm": 1.2712421417236328, + "learning_rate": 2.8458619792438744e-05, + "loss": 0.1504364013671875, + "step": 2592 + }, + { + "epoch": 0.17527375963228337, + "grad_norm": 0.7017257809638977, + "learning_rate": 2.845716259064079e-05, + "loss": 0.11431121826171875, + "step": 2593 + }, + { + "epoch": 0.1753413546032175, + "grad_norm": 1.15315580368042, + "learning_rate": 2.845570473770162e-05, + "loss": 0.23980712890625, + "step": 2594 + }, + { + "epoch": 0.17540894957415168, + "grad_norm": 0.4988826811313629, + "learning_rate": 2.8454246233691774e-05, + "loss": 0.0944671630859375, + "step": 2595 + }, + { + "epoch": 0.17547654454508585, + "grad_norm": 0.49966734647750854, + "learning_rate": 2.8452787078681823e-05, + "loss": 0.09105682373046875, + "step": 2596 + }, + { + "epoch": 0.17554413951602002, + "grad_norm": 0.4231088161468506, + "learning_rate": 2.8451327272742377e-05, + "loss": 0.06652641296386719, + "step": 2597 + }, + { + "epoch": 0.17561173448695416, + "grad_norm": 0.3994210660457611, + "learning_rate": 2.844986681594406e-05, + "loss": 0.089691162109375, + "step": 2598 + }, + { + "epoch": 0.17567932945788833, + "grad_norm": 0.6440356969833374, + "learning_rate": 2.8448405708357544e-05, + "loss": 0.10517120361328125, + "step": 2599 + }, + { + "epoch": 0.1757469244288225, + "grad_norm": 1.0671937465667725, + "learning_rate": 2.8446943950053524e-05, + "loss": 0.1286468505859375, + "step": 2600 + }, + { + "epoch": 0.17581451939975665, + "grad_norm": 0.8168448209762573, + "learning_rate": 2.844548154110273e-05, + "loss": 0.0980072021484375, + "step": 2601 + }, + { + "epoch": 0.17588211437069082, + "grad_norm": 0.7170403003692627, + "learning_rate": 2.8444018481575923e-05, + "loss": 0.1331787109375, + "step": 2602 + }, + { + "epoch": 0.175949709341625, + "grad_norm": 0.3724195659160614, + "learning_rate": 2.8442554771543896e-05, + "loss": 0.07040786743164062, + "step": 2603 + }, + { + "epoch": 0.17601730431255916, + "grad_norm": 0.926060140132904, + "learning_rate": 2.844109041107747e-05, + "loss": 0.1504058837890625, + "step": 2604 + }, + { + "epoch": 0.1760848992834933, + "grad_norm": 0.6477001905441284, + "learning_rate": 2.8439625400247502e-05, + "loss": 0.1256561279296875, + "step": 2605 + }, + { + "epoch": 0.17615249425442747, + "grad_norm": 0.3024241030216217, + "learning_rate": 2.8438159739124873e-05, + "loss": 0.05217742919921875, + "step": 2606 + }, + { + "epoch": 0.17622008922536164, + "grad_norm": 0.6380831599235535, + "learning_rate": 2.8436693427780508e-05, + "loss": 0.1217803955078125, + "step": 2607 + }, + { + "epoch": 0.17628768419629579, + "grad_norm": 0.7354841828346252, + "learning_rate": 2.8435226466285352e-05, + "loss": 0.098297119140625, + "step": 2608 + }, + { + "epoch": 0.17635527916722996, + "grad_norm": 0.46378952264785767, + "learning_rate": 2.8433758854710387e-05, + "loss": 0.076080322265625, + "step": 2609 + }, + { + "epoch": 0.17642287413816413, + "grad_norm": 0.6763085126876831, + "learning_rate": 2.8432290593126627e-05, + "loss": 0.129180908203125, + "step": 2610 + }, + { + "epoch": 0.1764904691090983, + "grad_norm": 0.36698558926582336, + "learning_rate": 2.8430821681605112e-05, + "loss": 0.06821441650390625, + "step": 2611 + }, + { + "epoch": 0.17655806408003244, + "grad_norm": 0.4217177629470825, + "learning_rate": 2.8429352120216914e-05, + "loss": 0.072113037109375, + "step": 2612 + }, + { + "epoch": 0.1766256590509666, + "grad_norm": 1.1464917659759521, + "learning_rate": 2.8427881909033147e-05, + "loss": 0.1634979248046875, + "step": 2613 + }, + { + "epoch": 0.17669325402190078, + "grad_norm": 0.659598171710968, + "learning_rate": 2.8426411048124952e-05, + "loss": 0.149932861328125, + "step": 2614 + }, + { + "epoch": 0.17676084899283492, + "grad_norm": 1.1933331489562988, + "learning_rate": 2.8424939537563483e-05, + "loss": 0.14019775390625, + "step": 2615 + }, + { + "epoch": 0.1768284439637691, + "grad_norm": 0.5787535905838013, + "learning_rate": 2.842346737741996e-05, + "loss": 0.096527099609375, + "step": 2616 + }, + { + "epoch": 0.17689603893470326, + "grad_norm": 0.46400120854377747, + "learning_rate": 2.84219945677656e-05, + "loss": 0.0949249267578125, + "step": 2617 + }, + { + "epoch": 0.1769636339056374, + "grad_norm": 0.35380008816719055, + "learning_rate": 2.842052110867167e-05, + "loss": 0.058746337890625, + "step": 2618 + }, + { + "epoch": 0.17703122887657158, + "grad_norm": 0.523481011390686, + "learning_rate": 2.8419047000209473e-05, + "loss": 0.08441925048828125, + "step": 2619 + }, + { + "epoch": 0.17709882384750575, + "grad_norm": 1.195439100265503, + "learning_rate": 2.8417572242450327e-05, + "loss": 0.1907196044921875, + "step": 2620 + }, + { + "epoch": 0.17716641881843992, + "grad_norm": 0.6112141609191895, + "learning_rate": 2.8416096835465594e-05, + "loss": 0.10929107666015625, + "step": 2621 + }, + { + "epoch": 0.17723401378937406, + "grad_norm": 1.194864273071289, + "learning_rate": 2.841462077932666e-05, + "loss": 0.1980438232421875, + "step": 2622 + }, + { + "epoch": 0.17730160876030823, + "grad_norm": 0.43192344903945923, + "learning_rate": 2.8413144074104952e-05, + "loss": 0.06439208984375, + "step": 2623 + }, + { + "epoch": 0.1773692037312424, + "grad_norm": 0.9106289744377136, + "learning_rate": 2.8411666719871917e-05, + "loss": 0.164276123046875, + "step": 2624 + }, + { + "epoch": 0.17743679870217655, + "grad_norm": 0.5607917904853821, + "learning_rate": 2.8410188716699037e-05, + "loss": 0.0865631103515625, + "step": 2625 + }, + { + "epoch": 0.17750439367311072, + "grad_norm": 0.4410433769226074, + "learning_rate": 2.8408710064657835e-05, + "loss": 0.108367919921875, + "step": 2626 + }, + { + "epoch": 0.1775719886440449, + "grad_norm": 0.6721046566963196, + "learning_rate": 2.8407230763819848e-05, + "loss": 0.11316680908203125, + "step": 2627 + }, + { + "epoch": 0.17763958361497906, + "grad_norm": 0.707149863243103, + "learning_rate": 2.8405750814256657e-05, + "loss": 0.1116943359375, + "step": 2628 + }, + { + "epoch": 0.1777071785859132, + "grad_norm": 1.0921528339385986, + "learning_rate": 2.8404270216039873e-05, + "loss": 0.1968536376953125, + "step": 2629 + }, + { + "epoch": 0.17777477355684737, + "grad_norm": 0.7578393816947937, + "learning_rate": 2.840278896924114e-05, + "loss": 0.14651870727539062, + "step": 2630 + }, + { + "epoch": 0.17784236852778154, + "grad_norm": 0.3428944945335388, + "learning_rate": 2.8401307073932123e-05, + "loss": 0.0629730224609375, + "step": 2631 + }, + { + "epoch": 0.17790996349871568, + "grad_norm": 1.6026618480682373, + "learning_rate": 2.8399824530184526e-05, + "loss": 0.2101898193359375, + "step": 2632 + }, + { + "epoch": 0.17797755846964985, + "grad_norm": 0.5062590837478638, + "learning_rate": 2.8398341338070085e-05, + "loss": 0.0839080810546875, + "step": 2633 + }, + { + "epoch": 0.17804515344058403, + "grad_norm": 2.6639838218688965, + "learning_rate": 2.8396857497660572e-05, + "loss": 0.278839111328125, + "step": 2634 + }, + { + "epoch": 0.1781127484115182, + "grad_norm": 0.28001368045806885, + "learning_rate": 2.8395373009027777e-05, + "loss": 0.054195404052734375, + "step": 2635 + }, + { + "epoch": 0.17818034338245234, + "grad_norm": 0.9827078580856323, + "learning_rate": 2.8393887872243528e-05, + "loss": 0.199951171875, + "step": 2636 + }, + { + "epoch": 0.1782479383533865, + "grad_norm": 0.6166521906852722, + "learning_rate": 2.839240208737969e-05, + "loss": 0.143280029296875, + "step": 2637 + }, + { + "epoch": 0.17831553332432068, + "grad_norm": 0.4910529553890228, + "learning_rate": 2.8390915654508153e-05, + "loss": 0.05150604248046875, + "step": 2638 + }, + { + "epoch": 0.17838312829525482, + "grad_norm": 0.2578500211238861, + "learning_rate": 2.838942857370084e-05, + "loss": 0.031803131103515625, + "step": 2639 + }, + { + "epoch": 0.178450723266189, + "grad_norm": 0.8010869026184082, + "learning_rate": 2.8387940845029703e-05, + "loss": 0.11525726318359375, + "step": 2640 + }, + { + "epoch": 0.17851831823712316, + "grad_norm": 0.57256680727005, + "learning_rate": 2.8386452468566726e-05, + "loss": 0.07747650146484375, + "step": 2641 + }, + { + "epoch": 0.17858591320805733, + "grad_norm": 0.2863466739654541, + "learning_rate": 2.8384963444383936e-05, + "loss": 0.059356689453125, + "step": 2642 + }, + { + "epoch": 0.17865350817899148, + "grad_norm": 0.2848878502845764, + "learning_rate": 2.8383473772553367e-05, + "loss": 0.04778289794921875, + "step": 2643 + }, + { + "epoch": 0.17872110314992565, + "grad_norm": 0.3277261257171631, + "learning_rate": 2.838198345314711e-05, + "loss": 0.0775604248046875, + "step": 2644 + }, + { + "epoch": 0.17878869812085982, + "grad_norm": 0.3272480368614197, + "learning_rate": 2.8380492486237276e-05, + "loss": 0.05400848388671875, + "step": 2645 + }, + { + "epoch": 0.17885629309179396, + "grad_norm": 0.2591632902622223, + "learning_rate": 2.8379000871895997e-05, + "loss": 0.049762725830078125, + "step": 2646 + }, + { + "epoch": 0.17892388806272813, + "grad_norm": 1.302453637123108, + "learning_rate": 2.8377508610195453e-05, + "loss": 0.1811981201171875, + "step": 2647 + }, + { + "epoch": 0.1789914830336623, + "grad_norm": 0.6309241056442261, + "learning_rate": 2.8376015701207855e-05, + "loss": 0.123382568359375, + "step": 2648 + }, + { + "epoch": 0.17905907800459644, + "grad_norm": 0.40369048714637756, + "learning_rate": 2.8374522145005425e-05, + "loss": 0.07410430908203125, + "step": 2649 + }, + { + "epoch": 0.17912667297553062, + "grad_norm": 0.367654412984848, + "learning_rate": 2.837302794166044e-05, + "loss": 0.043453216552734375, + "step": 2650 + }, + { + "epoch": 0.17919426794646479, + "grad_norm": 0.394548237323761, + "learning_rate": 2.8371533091245203e-05, + "loss": 0.075927734375, + "step": 2651 + }, + { + "epoch": 0.17926186291739896, + "grad_norm": 0.6938762068748474, + "learning_rate": 2.8370037593832033e-05, + "loss": 0.10087966918945312, + "step": 2652 + }, + { + "epoch": 0.1793294578883331, + "grad_norm": 0.9393921494483948, + "learning_rate": 2.8368541449493294e-05, + "loss": 0.17669677734375, + "step": 2653 + }, + { + "epoch": 0.17939705285926727, + "grad_norm": 0.731675386428833, + "learning_rate": 2.8367044658301388e-05, + "loss": 0.10785675048828125, + "step": 2654 + }, + { + "epoch": 0.17946464783020144, + "grad_norm": 1.1607789993286133, + "learning_rate": 2.8365547220328733e-05, + "loss": 0.1743621826171875, + "step": 2655 + }, + { + "epoch": 0.17953224280113558, + "grad_norm": 0.8046778440475464, + "learning_rate": 2.836404913564778e-05, + "loss": 0.143402099609375, + "step": 2656 + }, + { + "epoch": 0.17959983777206975, + "grad_norm": 0.4460134506225586, + "learning_rate": 2.836255040433102e-05, + "loss": 0.0518798828125, + "step": 2657 + }, + { + "epoch": 0.17966743274300392, + "grad_norm": 0.8369786143302917, + "learning_rate": 2.8361051026450973e-05, + "loss": 0.098480224609375, + "step": 2658 + }, + { + "epoch": 0.1797350277139381, + "grad_norm": 0.5481520295143127, + "learning_rate": 2.8359551002080185e-05, + "loss": 0.0505218505859375, + "step": 2659 + }, + { + "epoch": 0.17980262268487224, + "grad_norm": 1.2135237455368042, + "learning_rate": 2.8358050331291237e-05, + "loss": 0.145904541015625, + "step": 2660 + }, + { + "epoch": 0.1798702176558064, + "grad_norm": 0.40734127163887024, + "learning_rate": 2.835654901415674e-05, + "loss": 0.08202362060546875, + "step": 2661 + }, + { + "epoch": 0.17993781262674058, + "grad_norm": 1.5781525373458862, + "learning_rate": 2.835504705074934e-05, + "loss": 0.219573974609375, + "step": 2662 + }, + { + "epoch": 0.18000540759767472, + "grad_norm": 1.6900959014892578, + "learning_rate": 2.8353544441141707e-05, + "loss": 0.2012786865234375, + "step": 2663 + }, + { + "epoch": 0.1800730025686089, + "grad_norm": 1.167487382888794, + "learning_rate": 2.835204118540655e-05, + "loss": 0.1654052734375, + "step": 2664 + }, + { + "epoch": 0.18014059753954306, + "grad_norm": 0.5698453187942505, + "learning_rate": 2.8350537283616606e-05, + "loss": 0.07109832763671875, + "step": 2665 + }, + { + "epoch": 0.18020819251047723, + "grad_norm": 0.5061149597167969, + "learning_rate": 2.8349032735844643e-05, + "loss": 0.0646209716796875, + "step": 2666 + }, + { + "epoch": 0.18027578748141138, + "grad_norm": 0.5268478989601135, + "learning_rate": 2.8347527542163455e-05, + "loss": 0.06047248840332031, + "step": 2667 + }, + { + "epoch": 0.18034338245234555, + "grad_norm": 0.4555983543395996, + "learning_rate": 2.8346021702645885e-05, + "loss": 0.06185150146484375, + "step": 2668 + }, + { + "epoch": 0.18041097742327972, + "grad_norm": 2.0814390182495117, + "learning_rate": 2.8344515217364782e-05, + "loss": 0.1537322998046875, + "step": 2669 + }, + { + "epoch": 0.18047857239421386, + "grad_norm": 1.480249047279358, + "learning_rate": 2.8343008086393046e-05, + "loss": 0.217864990234375, + "step": 2670 + }, + { + "epoch": 0.18054616736514803, + "grad_norm": 1.307884693145752, + "learning_rate": 2.8341500309803598e-05, + "loss": 0.13681793212890625, + "step": 2671 + }, + { + "epoch": 0.1806137623360822, + "grad_norm": 0.7499336004257202, + "learning_rate": 2.8339991887669392e-05, + "loss": 0.12655258178710938, + "step": 2672 + }, + { + "epoch": 0.18068135730701637, + "grad_norm": 0.7336506247520447, + "learning_rate": 2.8338482820063422e-05, + "loss": 0.13720703125, + "step": 2673 + }, + { + "epoch": 0.18074895227795051, + "grad_norm": 0.632321834564209, + "learning_rate": 2.8336973107058703e-05, + "loss": 0.150482177734375, + "step": 2674 + }, + { + "epoch": 0.18081654724888468, + "grad_norm": 1.2509410381317139, + "learning_rate": 2.833546274872828e-05, + "loss": 0.274505615234375, + "step": 2675 + }, + { + "epoch": 0.18088414221981886, + "grad_norm": 1.1313482522964478, + "learning_rate": 2.833395174514524e-05, + "loss": 0.120819091796875, + "step": 2676 + }, + { + "epoch": 0.180951737190753, + "grad_norm": 0.649752140045166, + "learning_rate": 2.833244009638269e-05, + "loss": 0.071685791015625, + "step": 2677 + }, + { + "epoch": 0.18101933216168717, + "grad_norm": 1.4331659078598022, + "learning_rate": 2.8330927802513774e-05, + "loss": 0.118621826171875, + "step": 2678 + }, + { + "epoch": 0.18108692713262134, + "grad_norm": 0.9556083679199219, + "learning_rate": 2.8329414863611667e-05, + "loss": 0.20172119140625, + "step": 2679 + }, + { + "epoch": 0.18115452210355548, + "grad_norm": 1.1069291830062866, + "learning_rate": 2.8327901279749575e-05, + "loss": 0.12920379638671875, + "step": 2680 + }, + { + "epoch": 0.18122211707448965, + "grad_norm": 1.1290264129638672, + "learning_rate": 2.8326387051000736e-05, + "loss": 0.1331024169921875, + "step": 2681 + }, + { + "epoch": 0.18128971204542382, + "grad_norm": 0.30562278628349304, + "learning_rate": 2.832487217743841e-05, + "loss": 0.046295166015625, + "step": 2682 + }, + { + "epoch": 0.181357307016358, + "grad_norm": 1.576183557510376, + "learning_rate": 2.8323356659135903e-05, + "loss": 0.26251220703125, + "step": 2683 + }, + { + "epoch": 0.18142490198729214, + "grad_norm": 1.6176811456680298, + "learning_rate": 2.8321840496166547e-05, + "loss": 0.1767120361328125, + "step": 2684 + }, + { + "epoch": 0.1814924969582263, + "grad_norm": 0.7579689621925354, + "learning_rate": 2.8320323688603698e-05, + "loss": 0.08283805847167969, + "step": 2685 + }, + { + "epoch": 0.18156009192916048, + "grad_norm": 2.577871561050415, + "learning_rate": 2.831880623652075e-05, + "loss": 0.202239990234375, + "step": 2686 + }, + { + "epoch": 0.18162768690009462, + "grad_norm": 0.617362916469574, + "learning_rate": 2.831728813999113e-05, + "loss": 0.065521240234375, + "step": 2687 + }, + { + "epoch": 0.1816952818710288, + "grad_norm": 0.6718600392341614, + "learning_rate": 2.8315769399088287e-05, + "loss": 0.12067794799804688, + "step": 2688 + }, + { + "epoch": 0.18176287684196296, + "grad_norm": 0.44866397976875305, + "learning_rate": 2.8314250013885713e-05, + "loss": 0.08147430419921875, + "step": 2689 + }, + { + "epoch": 0.18183047181289713, + "grad_norm": 0.17214636504650116, + "learning_rate": 2.8312729984456925e-05, + "loss": 0.031375885009765625, + "step": 2690 + }, + { + "epoch": 0.18189806678383127, + "grad_norm": 0.9193543195724487, + "learning_rate": 2.8311209310875466e-05, + "loss": 0.15177154541015625, + "step": 2691 + }, + { + "epoch": 0.18196566175476545, + "grad_norm": 0.32428833842277527, + "learning_rate": 2.830968799321492e-05, + "loss": 0.046661376953125, + "step": 2692 + }, + { + "epoch": 0.18203325672569962, + "grad_norm": 1.503811240196228, + "learning_rate": 2.8308166031548896e-05, + "loss": 0.133392333984375, + "step": 2693 + }, + { + "epoch": 0.18210085169663376, + "grad_norm": 0.7290600538253784, + "learning_rate": 2.830664342595104e-05, + "loss": 0.1280517578125, + "step": 2694 + }, + { + "epoch": 0.18216844666756793, + "grad_norm": 0.34502893686294556, + "learning_rate": 2.8305120176495018e-05, + "loss": 0.06060028076171875, + "step": 2695 + }, + { + "epoch": 0.1822360416385021, + "grad_norm": 0.3365893065929413, + "learning_rate": 2.830359628325454e-05, + "loss": 0.03924560546875, + "step": 2696 + }, + { + "epoch": 0.18230363660943627, + "grad_norm": 0.6770533919334412, + "learning_rate": 2.830207174630334e-05, + "loss": 0.14452362060546875, + "step": 2697 + }, + { + "epoch": 0.1823712315803704, + "grad_norm": 0.7991174459457397, + "learning_rate": 2.8300546565715183e-05, + "loss": 0.177703857421875, + "step": 2698 + }, + { + "epoch": 0.18243882655130458, + "grad_norm": 0.9410693049430847, + "learning_rate": 2.8299020741563867e-05, + "loss": 0.171966552734375, + "step": 2699 + }, + { + "epoch": 0.18250642152223875, + "grad_norm": 1.1941800117492676, + "learning_rate": 2.8297494273923227e-05, + "loss": 0.1780242919921875, + "step": 2700 + }, + { + "epoch": 0.1825740164931729, + "grad_norm": 0.9848393797874451, + "learning_rate": 2.8295967162867113e-05, + "loss": 0.188323974609375, + "step": 2701 + }, + { + "epoch": 0.18264161146410707, + "grad_norm": 0.2876761555671692, + "learning_rate": 2.8294439408469423e-05, + "loss": 0.05744171142578125, + "step": 2702 + }, + { + "epoch": 0.18270920643504124, + "grad_norm": 0.9251553416252136, + "learning_rate": 2.8292911010804076e-05, + "loss": 0.133819580078125, + "step": 2703 + }, + { + "epoch": 0.1827768014059754, + "grad_norm": 0.9372506141662598, + "learning_rate": 2.8291381969945027e-05, + "loss": 0.152923583984375, + "step": 2704 + }, + { + "epoch": 0.18284439637690955, + "grad_norm": 0.8000345826148987, + "learning_rate": 2.828985228596626e-05, + "loss": 0.216278076171875, + "step": 2705 + }, + { + "epoch": 0.18291199134784372, + "grad_norm": 0.4019085764884949, + "learning_rate": 2.828832195894179e-05, + "loss": 0.093292236328125, + "step": 2706 + }, + { + "epoch": 0.1829795863187779, + "grad_norm": 1.0966920852661133, + "learning_rate": 2.8286790988945665e-05, + "loss": 0.26739501953125, + "step": 2707 + }, + { + "epoch": 0.18304718128971204, + "grad_norm": 0.32574495673179626, + "learning_rate": 2.8285259376051962e-05, + "loss": 0.07552337646484375, + "step": 2708 + }, + { + "epoch": 0.1831147762606462, + "grad_norm": 0.24490371346473694, + "learning_rate": 2.828372712033479e-05, + "loss": 0.050384521484375, + "step": 2709 + }, + { + "epoch": 0.18318237123158038, + "grad_norm": 1.6749708652496338, + "learning_rate": 2.8282194221868292e-05, + "loss": 0.1798095703125, + "step": 2710 + }, + { + "epoch": 0.18324996620251452, + "grad_norm": 0.8730593919754028, + "learning_rate": 2.828066068072663e-05, + "loss": 0.22442626953125, + "step": 2711 + }, + { + "epoch": 0.1833175611734487, + "grad_norm": 1.0404692888259888, + "learning_rate": 2.8279126496984018e-05, + "loss": 0.11472320556640625, + "step": 2712 + }, + { + "epoch": 0.18338515614438286, + "grad_norm": 0.49306848645210266, + "learning_rate": 2.8277591670714688e-05, + "loss": 0.08101272583007812, + "step": 2713 + }, + { + "epoch": 0.18345275111531703, + "grad_norm": 0.7291740775108337, + "learning_rate": 2.8276056201992894e-05, + "loss": 0.13650894165039062, + "step": 2714 + }, + { + "epoch": 0.18352034608625117, + "grad_norm": 0.3714842200279236, + "learning_rate": 2.8274520090892937e-05, + "loss": 0.05924224853515625, + "step": 2715 + }, + { + "epoch": 0.18358794105718534, + "grad_norm": 0.2982882559299469, + "learning_rate": 2.8272983337489146e-05, + "loss": 0.039585113525390625, + "step": 2716 + }, + { + "epoch": 0.18365553602811951, + "grad_norm": 0.5788488984107971, + "learning_rate": 2.827144594185588e-05, + "loss": 0.06266021728515625, + "step": 2717 + }, + { + "epoch": 0.18372313099905366, + "grad_norm": 0.427815318107605, + "learning_rate": 2.8269907904067526e-05, + "loss": 0.070404052734375, + "step": 2718 + }, + { + "epoch": 0.18379072596998783, + "grad_norm": 0.18904438614845276, + "learning_rate": 2.8268369224198502e-05, + "loss": 0.0257415771484375, + "step": 2719 + }, + { + "epoch": 0.183858320940922, + "grad_norm": 1.226590871810913, + "learning_rate": 2.8266829902323254e-05, + "loss": 0.1758270263671875, + "step": 2720 + }, + { + "epoch": 0.18392591591185617, + "grad_norm": 0.22583255171775818, + "learning_rate": 2.8265289938516275e-05, + "loss": 0.04299163818359375, + "step": 2721 + }, + { + "epoch": 0.1839935108827903, + "grad_norm": 0.6689764261245728, + "learning_rate": 2.8263749332852072e-05, + "loss": 0.07073211669921875, + "step": 2722 + }, + { + "epoch": 0.18406110585372448, + "grad_norm": 0.9727574586868286, + "learning_rate": 2.826220808540519e-05, + "loss": 0.183441162109375, + "step": 2723 + }, + { + "epoch": 0.18412870082465865, + "grad_norm": 0.4915236532688141, + "learning_rate": 2.8260666196250204e-05, + "loss": 0.06916046142578125, + "step": 2724 + }, + { + "epoch": 0.1841962957955928, + "grad_norm": 0.42398425936698914, + "learning_rate": 2.8259123665461723e-05, + "loss": 0.0824432373046875, + "step": 2725 + }, + { + "epoch": 0.18426389076652697, + "grad_norm": 0.44105029106140137, + "learning_rate": 2.8257580493114376e-05, + "loss": 0.08392333984375, + "step": 2726 + }, + { + "epoch": 0.18433148573746114, + "grad_norm": 0.5574656128883362, + "learning_rate": 2.8256036679282843e-05, + "loss": 0.1029510498046875, + "step": 2727 + }, + { + "epoch": 0.1843990807083953, + "grad_norm": 0.32173946499824524, + "learning_rate": 2.8254492224041814e-05, + "loss": 0.06233978271484375, + "step": 2728 + }, + { + "epoch": 0.18446667567932945, + "grad_norm": 0.7661775350570679, + "learning_rate": 2.825294712746602e-05, + "loss": 0.126708984375, + "step": 2729 + }, + { + "epoch": 0.18453427065026362, + "grad_norm": 1.134836196899414, + "learning_rate": 2.8251401389630227e-05, + "loss": 0.197418212890625, + "step": 2730 + }, + { + "epoch": 0.1846018656211978, + "grad_norm": 0.4423196315765381, + "learning_rate": 2.8249855010609225e-05, + "loss": 0.04698944091796875, + "step": 2731 + }, + { + "epoch": 0.18466946059213193, + "grad_norm": 0.291131854057312, + "learning_rate": 2.824830799047784e-05, + "loss": 0.0511016845703125, + "step": 2732 + }, + { + "epoch": 0.1847370555630661, + "grad_norm": 1.6360113620758057, + "learning_rate": 2.824676032931092e-05, + "loss": 0.14276123046875, + "step": 2733 + }, + { + "epoch": 0.18480465053400028, + "grad_norm": 0.5733312964439392, + "learning_rate": 2.824521202718336e-05, + "loss": 0.12314605712890625, + "step": 2734 + }, + { + "epoch": 0.18487224550493445, + "grad_norm": 1.3186084032058716, + "learning_rate": 2.824366308417006e-05, + "loss": 0.14581298828125, + "step": 2735 + }, + { + "epoch": 0.1849398404758686, + "grad_norm": 2.870043992996216, + "learning_rate": 2.8242113500345988e-05, + "loss": 0.22691726684570312, + "step": 2736 + }, + { + "epoch": 0.18500743544680276, + "grad_norm": 0.2960669696331024, + "learning_rate": 2.8240563275786112e-05, + "loss": 0.0648651123046875, + "step": 2737 + }, + { + "epoch": 0.18507503041773693, + "grad_norm": 0.27601000666618347, + "learning_rate": 2.8239012410565443e-05, + "loss": 0.04425048828125, + "step": 2738 + }, + { + "epoch": 0.18514262538867107, + "grad_norm": 0.6796697378158569, + "learning_rate": 2.8237460904759018e-05, + "loss": 0.10432243347167969, + "step": 2739 + }, + { + "epoch": 0.18521022035960524, + "grad_norm": 2.871088981628418, + "learning_rate": 2.8235908758441914e-05, + "loss": 0.32928466796875, + "step": 2740 + }, + { + "epoch": 0.1852778153305394, + "grad_norm": 0.5840921401977539, + "learning_rate": 2.8234355971689226e-05, + "loss": 0.0934906005859375, + "step": 2741 + }, + { + "epoch": 0.18534541030147356, + "grad_norm": 0.6112332344055176, + "learning_rate": 2.8232802544576097e-05, + "loss": 0.09563446044921875, + "step": 2742 + }, + { + "epoch": 0.18541300527240773, + "grad_norm": 0.3314615488052368, + "learning_rate": 2.8231248477177688e-05, + "loss": 0.06067657470703125, + "step": 2743 + }, + { + "epoch": 0.1854806002433419, + "grad_norm": 0.3827812969684601, + "learning_rate": 2.822969376956919e-05, + "loss": 0.08642578125, + "step": 2744 + }, + { + "epoch": 0.18554819521427607, + "grad_norm": 0.34818539023399353, + "learning_rate": 2.822813842182584e-05, + "loss": 0.06081390380859375, + "step": 2745 + }, + { + "epoch": 0.1856157901852102, + "grad_norm": 2.0407185554504395, + "learning_rate": 2.822658243402288e-05, + "loss": 0.237091064453125, + "step": 2746 + }, + { + "epoch": 0.18568338515614438, + "grad_norm": 0.7491267323493958, + "learning_rate": 2.8225025806235612e-05, + "loss": 0.1473541259765625, + "step": 2747 + }, + { + "epoch": 0.18575098012707855, + "grad_norm": 1.2147232294082642, + "learning_rate": 2.8223468538539344e-05, + "loss": 0.1529998779296875, + "step": 2748 + }, + { + "epoch": 0.1858185750980127, + "grad_norm": 0.47354191541671753, + "learning_rate": 2.8221910631009434e-05, + "loss": 0.0836029052734375, + "step": 2749 + }, + { + "epoch": 0.18588617006894687, + "grad_norm": 0.917874813079834, + "learning_rate": 2.822035208372127e-05, + "loss": 0.20428466796875, + "step": 2750 + }, + { + "epoch": 0.18595376503988104, + "grad_norm": 0.33157745003700256, + "learning_rate": 2.8218792896750247e-05, + "loss": 0.051647186279296875, + "step": 2751 + }, + { + "epoch": 0.1860213600108152, + "grad_norm": 0.3044157922267914, + "learning_rate": 2.8217233070171816e-05, + "loss": 0.0693206787109375, + "step": 2752 + }, + { + "epoch": 0.18608895498174935, + "grad_norm": 1.812117099761963, + "learning_rate": 2.8215672604061453e-05, + "loss": 0.20867919921875, + "step": 2753 + }, + { + "epoch": 0.18615654995268352, + "grad_norm": 0.23671402037143707, + "learning_rate": 2.821411149849467e-05, + "loss": 0.027078628540039062, + "step": 2754 + }, + { + "epoch": 0.1862241449236177, + "grad_norm": 0.8790021538734436, + "learning_rate": 2.8212549753546983e-05, + "loss": 0.1502227783203125, + "step": 2755 + }, + { + "epoch": 0.18629173989455183, + "grad_norm": 1.6231859922409058, + "learning_rate": 2.8210987369293977e-05, + "loss": 0.16033935546875, + "step": 2756 + }, + { + "epoch": 0.186359334865486, + "grad_norm": 1.122911810874939, + "learning_rate": 2.8209424345811247e-05, + "loss": 0.2371826171875, + "step": 2757 + }, + { + "epoch": 0.18642692983642017, + "grad_norm": 1.2879860401153564, + "learning_rate": 2.8207860683174415e-05, + "loss": 0.233642578125, + "step": 2758 + }, + { + "epoch": 0.18649452480735434, + "grad_norm": 0.4710730314254761, + "learning_rate": 2.8206296381459143e-05, + "loss": 0.082305908203125, + "step": 2759 + }, + { + "epoch": 0.1865621197782885, + "grad_norm": 0.5988683700561523, + "learning_rate": 2.820473144074113e-05, + "loss": 0.11187744140625, + "step": 2760 + }, + { + "epoch": 0.18662971474922266, + "grad_norm": 0.6023953557014465, + "learning_rate": 2.8203165861096078e-05, + "loss": 0.118743896484375, + "step": 2761 + }, + { + "epoch": 0.18669730972015683, + "grad_norm": 0.4228247404098511, + "learning_rate": 2.820159964259976e-05, + "loss": 0.06354141235351562, + "step": 2762 + }, + { + "epoch": 0.18676490469109097, + "grad_norm": 0.38556283712387085, + "learning_rate": 2.820003278532795e-05, + "loss": 0.07057952880859375, + "step": 2763 + }, + { + "epoch": 0.18683249966202514, + "grad_norm": 0.7824898362159729, + "learning_rate": 2.8198465289356462e-05, + "loss": 0.14733123779296875, + "step": 2764 + }, + { + "epoch": 0.1869000946329593, + "grad_norm": 0.28138163685798645, + "learning_rate": 2.819689715476115e-05, + "loss": 0.06805419921875, + "step": 2765 + }, + { + "epoch": 0.18696768960389348, + "grad_norm": 1.3033699989318848, + "learning_rate": 2.8195328381617878e-05, + "loss": 0.11346435546875, + "step": 2766 + }, + { + "epoch": 0.18703528457482763, + "grad_norm": 0.6534273028373718, + "learning_rate": 2.8193758970002557e-05, + "loss": 0.122100830078125, + "step": 2767 + }, + { + "epoch": 0.1871028795457618, + "grad_norm": 0.30544140934944153, + "learning_rate": 2.819218891999113e-05, + "loss": 0.04146766662597656, + "step": 2768 + }, + { + "epoch": 0.18717047451669597, + "grad_norm": 1.0785890817642212, + "learning_rate": 2.8190618231659557e-05, + "loss": 0.2003631591796875, + "step": 2769 + }, + { + "epoch": 0.1872380694876301, + "grad_norm": 0.3593985438346863, + "learning_rate": 2.8189046905083845e-05, + "loss": 0.08022308349609375, + "step": 2770 + }, + { + "epoch": 0.18730566445856428, + "grad_norm": 0.5689581632614136, + "learning_rate": 2.818747494034002e-05, + "loss": 0.07807540893554688, + "step": 2771 + }, + { + "epoch": 0.18737325942949845, + "grad_norm": 0.883661150932312, + "learning_rate": 2.8185902337504146e-05, + "loss": 0.18646240234375, + "step": 2772 + }, + { + "epoch": 0.18744085440043262, + "grad_norm": 0.7267542481422424, + "learning_rate": 2.8184329096652317e-05, + "loss": 0.12113189697265625, + "step": 2773 + }, + { + "epoch": 0.18750844937136676, + "grad_norm": 0.7307393550872803, + "learning_rate": 2.818275521786065e-05, + "loss": 0.15996551513671875, + "step": 2774 + }, + { + "epoch": 0.18757604434230093, + "grad_norm": 0.30206599831581116, + "learning_rate": 2.8181180701205308e-05, + "loss": 0.05406951904296875, + "step": 2775 + }, + { + "epoch": 0.1876436393132351, + "grad_norm": 0.38076815009117126, + "learning_rate": 2.817960554676247e-05, + "loss": 0.073089599609375, + "step": 2776 + }, + { + "epoch": 0.18771123428416925, + "grad_norm": 0.7420747876167297, + "learning_rate": 2.8178029754608347e-05, + "loss": 0.084716796875, + "step": 2777 + }, + { + "epoch": 0.18777882925510342, + "grad_norm": 0.4912596344947815, + "learning_rate": 2.8176453324819194e-05, + "loss": 0.06488800048828125, + "step": 2778 + }, + { + "epoch": 0.1878464242260376, + "grad_norm": 0.8168045878410339, + "learning_rate": 2.8174876257471285e-05, + "loss": 0.1399688720703125, + "step": 2779 + }, + { + "epoch": 0.18791401919697173, + "grad_norm": 1.3017029762268066, + "learning_rate": 2.8173298552640932e-05, + "loss": 0.208343505859375, + "step": 2780 + }, + { + "epoch": 0.1879816141679059, + "grad_norm": 1.2536793947219849, + "learning_rate": 2.817172021040447e-05, + "loss": 0.2042388916015625, + "step": 2781 + }, + { + "epoch": 0.18804920913884007, + "grad_norm": 0.9061697125434875, + "learning_rate": 2.8170141230838266e-05, + "loss": 0.1192169189453125, + "step": 2782 + }, + { + "epoch": 0.18811680410977424, + "grad_norm": 0.6753116846084595, + "learning_rate": 2.816856161401873e-05, + "loss": 0.0840911865234375, + "step": 2783 + }, + { + "epoch": 0.1881843990807084, + "grad_norm": 0.6355226039886475, + "learning_rate": 2.816698136002229e-05, + "loss": 0.1081390380859375, + "step": 2784 + }, + { + "epoch": 0.18825199405164256, + "grad_norm": 1.4275929927825928, + "learning_rate": 2.8165400468925405e-05, + "loss": 0.171112060546875, + "step": 2785 + }, + { + "epoch": 0.18831958902257673, + "grad_norm": 1.3709983825683594, + "learning_rate": 2.8163818940804565e-05, + "loss": 0.145233154296875, + "step": 2786 + }, + { + "epoch": 0.18838718399351087, + "grad_norm": 0.8417302370071411, + "learning_rate": 2.816223677573631e-05, + "loss": 0.09778594970703125, + "step": 2787 + }, + { + "epoch": 0.18845477896444504, + "grad_norm": 0.3473028242588043, + "learning_rate": 2.8160653973797175e-05, + "loss": 0.04540252685546875, + "step": 2788 + }, + { + "epoch": 0.1885223739353792, + "grad_norm": 0.9289171695709229, + "learning_rate": 2.815907053506376e-05, + "loss": 0.1611328125, + "step": 2789 + }, + { + "epoch": 0.18858996890631338, + "grad_norm": 0.6589667201042175, + "learning_rate": 2.815748645961268e-05, + "loss": 0.1355743408203125, + "step": 2790 + }, + { + "epoch": 0.18865756387724752, + "grad_norm": 1.5460929870605469, + "learning_rate": 2.8155901747520574e-05, + "loss": 0.16290283203125, + "step": 2791 + }, + { + "epoch": 0.1887251588481817, + "grad_norm": 0.4531334936618805, + "learning_rate": 2.8154316398864128e-05, + "loss": 0.07386016845703125, + "step": 2792 + }, + { + "epoch": 0.18879275381911587, + "grad_norm": 0.8176876306533813, + "learning_rate": 2.815273041372005e-05, + "loss": 0.187469482421875, + "step": 2793 + }, + { + "epoch": 0.18886034879005, + "grad_norm": 0.8999921083450317, + "learning_rate": 2.815114379216508e-05, + "loss": 0.12993240356445312, + "step": 2794 + }, + { + "epoch": 0.18892794376098418, + "grad_norm": 0.5266236066818237, + "learning_rate": 2.814955653427598e-05, + "loss": 0.0928497314453125, + "step": 2795 + }, + { + "epoch": 0.18899553873191835, + "grad_norm": 0.7256622314453125, + "learning_rate": 2.814796864012957e-05, + "loss": 0.16290283203125, + "step": 2796 + }, + { + "epoch": 0.18906313370285252, + "grad_norm": 0.30543768405914307, + "learning_rate": 2.814638010980266e-05, + "loss": 0.0656585693359375, + "step": 2797 + }, + { + "epoch": 0.18913072867378666, + "grad_norm": 0.2568742036819458, + "learning_rate": 2.8144790943372128e-05, + "loss": 0.038074493408203125, + "step": 2798 + }, + { + "epoch": 0.18919832364472083, + "grad_norm": 2.522318124771118, + "learning_rate": 2.8143201140914866e-05, + "loss": 0.17993927001953125, + "step": 2799 + }, + { + "epoch": 0.189265918615655, + "grad_norm": 0.9353402256965637, + "learning_rate": 2.8141610702507797e-05, + "loss": 0.1602783203125, + "step": 2800 + }, + { + "epoch": 0.18933351358658915, + "grad_norm": 1.096329927444458, + "learning_rate": 2.8140019628227868e-05, + "loss": 0.169036865234375, + "step": 2801 + }, + { + "epoch": 0.18940110855752332, + "grad_norm": 0.9330121874809265, + "learning_rate": 2.813842791815208e-05, + "loss": 0.14581298828125, + "step": 2802 + }, + { + "epoch": 0.1894687035284575, + "grad_norm": 1.3017652034759521, + "learning_rate": 2.813683557235744e-05, + "loss": 0.18682861328125, + "step": 2803 + }, + { + "epoch": 0.18953629849939166, + "grad_norm": 0.8354668021202087, + "learning_rate": 2.8135242590921e-05, + "loss": 0.1792144775390625, + "step": 2804 + }, + { + "epoch": 0.1896038934703258, + "grad_norm": 0.9768109321594238, + "learning_rate": 2.813364897391983e-05, + "loss": 0.15126800537109375, + "step": 2805 + }, + { + "epoch": 0.18967148844125997, + "grad_norm": 0.2769661545753479, + "learning_rate": 2.8132054721431045e-05, + "loss": 0.046783447265625, + "step": 2806 + }, + { + "epoch": 0.18973908341219414, + "grad_norm": 0.9431861042976379, + "learning_rate": 2.8130459833531793e-05, + "loss": 0.188507080078125, + "step": 2807 + }, + { + "epoch": 0.18980667838312829, + "grad_norm": 0.4199741780757904, + "learning_rate": 2.812886431029923e-05, + "loss": 0.0526123046875, + "step": 2808 + }, + { + "epoch": 0.18987427335406246, + "grad_norm": 0.5806202292442322, + "learning_rate": 2.812726815181057e-05, + "loss": 0.12744903564453125, + "step": 2809 + }, + { + "epoch": 0.18994186832499663, + "grad_norm": 2.9831552505493164, + "learning_rate": 2.8125671358143035e-05, + "loss": 0.276275634765625, + "step": 2810 + }, + { + "epoch": 0.19000946329593077, + "grad_norm": 1.401862382888794, + "learning_rate": 2.8124073929373898e-05, + "loss": 0.140289306640625, + "step": 2811 + }, + { + "epoch": 0.19007705826686494, + "grad_norm": 0.45540133118629456, + "learning_rate": 2.8122475865580437e-05, + "loss": 0.0519866943359375, + "step": 2812 + }, + { + "epoch": 0.1901446532377991, + "grad_norm": 0.23712879419326782, + "learning_rate": 2.8120877166839992e-05, + "loss": 0.0442047119140625, + "step": 2813 + }, + { + "epoch": 0.19021224820873328, + "grad_norm": 1.5406016111373901, + "learning_rate": 2.811927783322991e-05, + "loss": 0.19970703125, + "step": 2814 + }, + { + "epoch": 0.19027984317966742, + "grad_norm": 0.4917134642601013, + "learning_rate": 2.811767786482758e-05, + "loss": 0.1029815673828125, + "step": 2815 + }, + { + "epoch": 0.1903474381506016, + "grad_norm": 0.5836704969406128, + "learning_rate": 2.8116077261710413e-05, + "loss": 0.0988006591796875, + "step": 2816 + }, + { + "epoch": 0.19041503312153576, + "grad_norm": 0.37548166513442993, + "learning_rate": 2.8114476023955865e-05, + "loss": 0.0512847900390625, + "step": 2817 + }, + { + "epoch": 0.1904826280924699, + "grad_norm": 1.0470856428146362, + "learning_rate": 2.8112874151641407e-05, + "loss": 0.18548583984375, + "step": 2818 + }, + { + "epoch": 0.19055022306340408, + "grad_norm": 0.7917494177818298, + "learning_rate": 2.811127164484455e-05, + "loss": 0.1536865234375, + "step": 2819 + }, + { + "epoch": 0.19061781803433825, + "grad_norm": 0.7561525702476501, + "learning_rate": 2.810966850364283e-05, + "loss": 0.1343536376953125, + "step": 2820 + }, + { + "epoch": 0.19068541300527242, + "grad_norm": 0.319332093000412, + "learning_rate": 2.8108064728113825e-05, + "loss": 0.0626678466796875, + "step": 2821 + }, + { + "epoch": 0.19075300797620656, + "grad_norm": 0.6892013549804688, + "learning_rate": 2.8106460318335124e-05, + "loss": 0.1307525634765625, + "step": 2822 + }, + { + "epoch": 0.19082060294714073, + "grad_norm": 1.043141484260559, + "learning_rate": 2.810485527438437e-05, + "loss": 0.133026123046875, + "step": 2823 + }, + { + "epoch": 0.1908881979180749, + "grad_norm": 0.832349419593811, + "learning_rate": 2.8103249596339214e-05, + "loss": 0.1361083984375, + "step": 2824 + }, + { + "epoch": 0.19095579288900905, + "grad_norm": 0.6996049880981445, + "learning_rate": 2.810164328427736e-05, + "loss": 0.09228515625, + "step": 2825 + }, + { + "epoch": 0.19102338785994322, + "grad_norm": 0.9055985808372498, + "learning_rate": 2.810003633827652e-05, + "loss": 0.22540283203125, + "step": 2826 + }, + { + "epoch": 0.1910909828308774, + "grad_norm": 2.0021252632141113, + "learning_rate": 2.8098428758414458e-05, + "loss": 0.1844329833984375, + "step": 2827 + }, + { + "epoch": 0.19115857780181156, + "grad_norm": 0.6353436708450317, + "learning_rate": 2.8096820544768955e-05, + "loss": 0.11151885986328125, + "step": 2828 + }, + { + "epoch": 0.1912261727727457, + "grad_norm": 1.2581819295883179, + "learning_rate": 2.8095211697417823e-05, + "loss": 0.2371826171875, + "step": 2829 + }, + { + "epoch": 0.19129376774367987, + "grad_norm": 0.39188939332962036, + "learning_rate": 2.8093602216438914e-05, + "loss": 0.0804901123046875, + "step": 2830 + }, + { + "epoch": 0.19136136271461404, + "grad_norm": 0.47446146607398987, + "learning_rate": 2.8091992101910094e-05, + "loss": 0.10833740234375, + "step": 2831 + }, + { + "epoch": 0.19142895768554818, + "grad_norm": 0.8261989951133728, + "learning_rate": 2.809038135390929e-05, + "loss": 0.1522674560546875, + "step": 2832 + }, + { + "epoch": 0.19149655265648236, + "grad_norm": 0.913982629776001, + "learning_rate": 2.808876997251442e-05, + "loss": 0.14608001708984375, + "step": 2833 + }, + { + "epoch": 0.19156414762741653, + "grad_norm": 0.9417384266853333, + "learning_rate": 2.808715795780346e-05, + "loss": 0.229156494140625, + "step": 2834 + }, + { + "epoch": 0.1916317425983507, + "grad_norm": 0.7514187693595886, + "learning_rate": 2.808554530985441e-05, + "loss": 0.13122844696044922, + "step": 2835 + }, + { + "epoch": 0.19169933756928484, + "grad_norm": 0.28768935799598694, + "learning_rate": 2.80839320287453e-05, + "loss": 0.05435943603515625, + "step": 2836 + }, + { + "epoch": 0.191766932540219, + "grad_norm": 0.5145201086997986, + "learning_rate": 2.8082318114554192e-05, + "loss": 0.10535430908203125, + "step": 2837 + }, + { + "epoch": 0.19183452751115318, + "grad_norm": 0.5330971479415894, + "learning_rate": 2.8080703567359177e-05, + "loss": 0.07876205444335938, + "step": 2838 + }, + { + "epoch": 0.19190212248208732, + "grad_norm": 0.49194416403770447, + "learning_rate": 2.8079088387238375e-05, + "loss": 0.07155609130859375, + "step": 2839 + }, + { + "epoch": 0.1919697174530215, + "grad_norm": 1.8074434995651245, + "learning_rate": 2.8077472574269934e-05, + "loss": 0.1910400390625, + "step": 2840 + }, + { + "epoch": 0.19203731242395566, + "grad_norm": 1.7718993425369263, + "learning_rate": 2.807585612853205e-05, + "loss": 0.1451873779296875, + "step": 2841 + }, + { + "epoch": 0.1921049073948898, + "grad_norm": 0.8072692155838013, + "learning_rate": 2.8074239050102923e-05, + "loss": 0.14786529541015625, + "step": 2842 + }, + { + "epoch": 0.19217250236582398, + "grad_norm": 0.9575350880622864, + "learning_rate": 2.80726213390608e-05, + "loss": 0.23828125, + "step": 2843 + }, + { + "epoch": 0.19224009733675815, + "grad_norm": 0.34975147247314453, + "learning_rate": 2.8071002995483967e-05, + "loss": 0.0616455078125, + "step": 2844 + }, + { + "epoch": 0.19230769230769232, + "grad_norm": 1.5568853616714478, + "learning_rate": 2.8069384019450715e-05, + "loss": 0.202117919921875, + "step": 2845 + }, + { + "epoch": 0.19237528727862646, + "grad_norm": 0.8236450552940369, + "learning_rate": 2.8067764411039388e-05, + "loss": 0.15826416015625, + "step": 2846 + }, + { + "epoch": 0.19244288224956063, + "grad_norm": 0.8248158693313599, + "learning_rate": 2.8066144170328354e-05, + "loss": 0.121673583984375, + "step": 2847 + }, + { + "epoch": 0.1925104772204948, + "grad_norm": 0.8230424523353577, + "learning_rate": 2.8064523297396004e-05, + "loss": 0.12656402587890625, + "step": 2848 + }, + { + "epoch": 0.19257807219142895, + "grad_norm": 0.5142787098884583, + "learning_rate": 2.8062901792320773e-05, + "loss": 0.098907470703125, + "step": 2849 + }, + { + "epoch": 0.19264566716236312, + "grad_norm": 0.569058895111084, + "learning_rate": 2.8061279655181114e-05, + "loss": 0.115264892578125, + "step": 2850 + }, + { + "epoch": 0.1927132621332973, + "grad_norm": 0.9742226600646973, + "learning_rate": 2.805965688605552e-05, + "loss": 0.1927032470703125, + "step": 2851 + }, + { + "epoch": 0.19278085710423146, + "grad_norm": 0.28810739517211914, + "learning_rate": 2.8058033485022502e-05, + "loss": 0.04448699951171875, + "step": 2852 + }, + { + "epoch": 0.1928484520751656, + "grad_norm": 1.082694172859192, + "learning_rate": 2.8056409452160627e-05, + "loss": 0.2052001953125, + "step": 2853 + }, + { + "epoch": 0.19291604704609977, + "grad_norm": 0.4806605279445648, + "learning_rate": 2.8054784787548457e-05, + "loss": 0.05328369140625, + "step": 2854 + }, + { + "epoch": 0.19298364201703394, + "grad_norm": 0.6307491660118103, + "learning_rate": 2.8053159491264617e-05, + "loss": 0.11708831787109375, + "step": 2855 + }, + { + "epoch": 0.19305123698796808, + "grad_norm": 0.3054559528827667, + "learning_rate": 2.805153356338774e-05, + "loss": 0.05960845947265625, + "step": 2856 + }, + { + "epoch": 0.19311883195890225, + "grad_norm": 1.4670734405517578, + "learning_rate": 2.8049907003996504e-05, + "loss": 0.216705322265625, + "step": 2857 + }, + { + "epoch": 0.19318642692983642, + "grad_norm": 1.2775678634643555, + "learning_rate": 2.8048279813169613e-05, + "loss": 0.15863037109375, + "step": 2858 + }, + { + "epoch": 0.1932540219007706, + "grad_norm": 0.6060720682144165, + "learning_rate": 2.8046651990985796e-05, + "loss": 0.10914993286132812, + "step": 2859 + }, + { + "epoch": 0.19332161687170474, + "grad_norm": 0.3234412968158722, + "learning_rate": 2.804502353752382e-05, + "loss": 0.05609130859375, + "step": 2860 + }, + { + "epoch": 0.1933892118426389, + "grad_norm": 0.7513430714607239, + "learning_rate": 2.8043394452862482e-05, + "loss": 0.1409149169921875, + "step": 2861 + }, + { + "epoch": 0.19345680681357308, + "grad_norm": 1.0413551330566406, + "learning_rate": 2.8041764737080605e-05, + "loss": 0.229705810546875, + "step": 2862 + }, + { + "epoch": 0.19352440178450722, + "grad_norm": 0.18958882987499237, + "learning_rate": 2.804013439025704e-05, + "loss": 0.0379180908203125, + "step": 2863 + }, + { + "epoch": 0.1935919967554414, + "grad_norm": 0.3478553891181946, + "learning_rate": 2.8038503412470677e-05, + "loss": 0.0623931884765625, + "step": 2864 + }, + { + "epoch": 0.19365959172637556, + "grad_norm": 0.7979618906974792, + "learning_rate": 2.8036871803800433e-05, + "loss": 0.0991973876953125, + "step": 2865 + }, + { + "epoch": 0.19372718669730973, + "grad_norm": 0.926528811454773, + "learning_rate": 2.803523956432526e-05, + "loss": 0.09420013427734375, + "step": 2866 + }, + { + "epoch": 0.19379478166824388, + "grad_norm": 0.3139803409576416, + "learning_rate": 2.803360669412413e-05, + "loss": 0.04143524169921875, + "step": 2867 + }, + { + "epoch": 0.19386237663917805, + "grad_norm": 0.8273432850837708, + "learning_rate": 2.803197319327605e-05, + "loss": 0.14284896850585938, + "step": 2868 + }, + { + "epoch": 0.19392997161011222, + "grad_norm": 0.5852710008621216, + "learning_rate": 2.8030339061860068e-05, + "loss": 0.0992584228515625, + "step": 2869 + }, + { + "epoch": 0.19399756658104636, + "grad_norm": 0.438171923160553, + "learning_rate": 2.802870429995524e-05, + "loss": 0.0775146484375, + "step": 2870 + }, + { + "epoch": 0.19406516155198053, + "grad_norm": 0.365362286567688, + "learning_rate": 2.802706890764068e-05, + "loss": 0.07982635498046875, + "step": 2871 + }, + { + "epoch": 0.1941327565229147, + "grad_norm": 0.9503708481788635, + "learning_rate": 2.802543288499551e-05, + "loss": 0.1428985595703125, + "step": 2872 + }, + { + "epoch": 0.19420035149384884, + "grad_norm": 0.7592220902442932, + "learning_rate": 2.8023796232098886e-05, + "loss": 0.1609039306640625, + "step": 2873 + }, + { + "epoch": 0.19426794646478301, + "grad_norm": 0.5458354353904724, + "learning_rate": 2.8022158949030015e-05, + "loss": 0.11128997802734375, + "step": 2874 + }, + { + "epoch": 0.19433554143571719, + "grad_norm": 0.6208927035331726, + "learning_rate": 2.8020521035868103e-05, + "loss": 0.08568572998046875, + "step": 2875 + }, + { + "epoch": 0.19440313640665136, + "grad_norm": 0.7403990030288696, + "learning_rate": 2.8018882492692416e-05, + "loss": 0.131500244140625, + "step": 2876 + }, + { + "epoch": 0.1944707313775855, + "grad_norm": 1.4925041198730469, + "learning_rate": 2.801724331958223e-05, + "loss": 0.1778564453125, + "step": 2877 + }, + { + "epoch": 0.19453832634851967, + "grad_norm": 0.513454794883728, + "learning_rate": 2.8015603516616855e-05, + "loss": 0.0567474365234375, + "step": 2878 + }, + { + "epoch": 0.19460592131945384, + "grad_norm": 0.9903881549835205, + "learning_rate": 2.801396308387564e-05, + "loss": 0.1727294921875, + "step": 2879 + }, + { + "epoch": 0.19467351629038798, + "grad_norm": 1.594277262687683, + "learning_rate": 2.801232202143796e-05, + "loss": 0.1898193359375, + "step": 2880 + }, + { + "epoch": 0.19474111126132215, + "grad_norm": 0.5810085535049438, + "learning_rate": 2.8010680329383213e-05, + "loss": 0.1338653564453125, + "step": 2881 + }, + { + "epoch": 0.19480870623225632, + "grad_norm": 0.8080867528915405, + "learning_rate": 2.8009038007790843e-05, + "loss": 0.1284027099609375, + "step": 2882 + }, + { + "epoch": 0.1948763012031905, + "grad_norm": 0.3432023227214813, + "learning_rate": 2.800739505674031e-05, + "loss": 0.054046630859375, + "step": 2883 + }, + { + "epoch": 0.19494389617412464, + "grad_norm": 0.7777019143104553, + "learning_rate": 2.8005751476311114e-05, + "loss": 0.134857177734375, + "step": 2884 + }, + { + "epoch": 0.1950114911450588, + "grad_norm": 1.3524911403656006, + "learning_rate": 2.8004107266582777e-05, + "loss": 0.1396331787109375, + "step": 2885 + }, + { + "epoch": 0.19507908611599298, + "grad_norm": 0.5758187174797058, + "learning_rate": 2.800246242763486e-05, + "loss": 0.094085693359375, + "step": 2886 + }, + { + "epoch": 0.19514668108692712, + "grad_norm": 0.47332391142845154, + "learning_rate": 2.8000816959546954e-05, + "loss": 0.10526275634765625, + "step": 2887 + }, + { + "epoch": 0.1952142760578613, + "grad_norm": 0.3667571544647217, + "learning_rate": 2.7999170862398663e-05, + "loss": 0.06671142578125, + "step": 2888 + }, + { + "epoch": 0.19528187102879546, + "grad_norm": 2.553595542907715, + "learning_rate": 2.7997524136269654e-05, + "loss": 0.249542236328125, + "step": 2889 + }, + { + "epoch": 0.19534946599972963, + "grad_norm": 0.9393971562385559, + "learning_rate": 2.7995876781239594e-05, + "loss": 0.236724853515625, + "step": 2890 + }, + { + "epoch": 0.19541706097066378, + "grad_norm": 0.6540178656578064, + "learning_rate": 2.799422879738819e-05, + "loss": 0.1140899658203125, + "step": 2891 + }, + { + "epoch": 0.19548465594159795, + "grad_norm": 0.6477789878845215, + "learning_rate": 2.7992580184795188e-05, + "loss": 0.1411590576171875, + "step": 2892 + }, + { + "epoch": 0.19555225091253212, + "grad_norm": 2.86308217048645, + "learning_rate": 2.7990930943540363e-05, + "loss": 0.26495361328125, + "step": 2893 + }, + { + "epoch": 0.19561984588346626, + "grad_norm": 0.4627821445465088, + "learning_rate": 2.7989281073703502e-05, + "loss": 0.072845458984375, + "step": 2894 + }, + { + "epoch": 0.19568744085440043, + "grad_norm": 1.5744062662124634, + "learning_rate": 2.7987630575364448e-05, + "loss": 0.31414794921875, + "step": 2895 + }, + { + "epoch": 0.1957550358253346, + "grad_norm": 0.4284208416938782, + "learning_rate": 2.798597944860305e-05, + "loss": 0.06024169921875, + "step": 2896 + }, + { + "epoch": 0.19582263079626877, + "grad_norm": 0.39138203859329224, + "learning_rate": 2.798432769349922e-05, + "loss": 0.06308746337890625, + "step": 2897 + }, + { + "epoch": 0.1958902257672029, + "grad_norm": 0.3585103750228882, + "learning_rate": 2.7982675310132857e-05, + "loss": 0.05068206787109375, + "step": 2898 + }, + { + "epoch": 0.19595782073813708, + "grad_norm": 1.1100420951843262, + "learning_rate": 2.798102229858393e-05, + "loss": 0.162200927734375, + "step": 2899 + }, + { + "epoch": 0.19602541570907125, + "grad_norm": 0.5377454161643982, + "learning_rate": 2.7979368658932413e-05, + "loss": 0.0774993896484375, + "step": 2900 + }, + { + "epoch": 0.1960930106800054, + "grad_norm": 0.7198811769485474, + "learning_rate": 2.797771439125832e-05, + "loss": 0.16448974609375, + "step": 2901 + }, + { + "epoch": 0.19616060565093957, + "grad_norm": 0.42162027955055237, + "learning_rate": 2.79760594956417e-05, + "loss": 0.07456207275390625, + "step": 2902 + }, + { + "epoch": 0.19622820062187374, + "grad_norm": 0.8910574913024902, + "learning_rate": 2.7974403972162627e-05, + "loss": 0.147979736328125, + "step": 2903 + }, + { + "epoch": 0.19629579559280788, + "grad_norm": 0.3845575749874115, + "learning_rate": 2.7972747820901197e-05, + "loss": 0.06617355346679688, + "step": 2904 + }, + { + "epoch": 0.19636339056374205, + "grad_norm": 0.7746375203132629, + "learning_rate": 2.7971091041937553e-05, + "loss": 0.1569671630859375, + "step": 2905 + }, + { + "epoch": 0.19643098553467622, + "grad_norm": 0.8139361143112183, + "learning_rate": 2.7969433635351862e-05, + "loss": 0.12886810302734375, + "step": 2906 + }, + { + "epoch": 0.1964985805056104, + "grad_norm": 0.6551030874252319, + "learning_rate": 2.796777560122431e-05, + "loss": 0.08069610595703125, + "step": 2907 + }, + { + "epoch": 0.19656617547654454, + "grad_norm": 0.39828771352767944, + "learning_rate": 2.7966116939635133e-05, + "loss": 0.061389923095703125, + "step": 2908 + }, + { + "epoch": 0.1966337704474787, + "grad_norm": 0.8756114840507507, + "learning_rate": 2.7964457650664583e-05, + "loss": 0.157562255859375, + "step": 2909 + }, + { + "epoch": 0.19670136541841288, + "grad_norm": 1.4849371910095215, + "learning_rate": 2.7962797734392942e-05, + "loss": 0.2109222412109375, + "step": 2910 + }, + { + "epoch": 0.19676896038934702, + "grad_norm": 0.5050774216651917, + "learning_rate": 2.796113719090054e-05, + "loss": 0.07825469970703125, + "step": 2911 + }, + { + "epoch": 0.1968365553602812, + "grad_norm": 0.39509162306785583, + "learning_rate": 2.795947602026771e-05, + "loss": 0.09323883056640625, + "step": 2912 + }, + { + "epoch": 0.19690415033121536, + "grad_norm": 0.1949889361858368, + "learning_rate": 2.7957814222574834e-05, + "loss": 0.02544403076171875, + "step": 2913 + }, + { + "epoch": 0.19697174530214953, + "grad_norm": 1.634000539779663, + "learning_rate": 2.7956151797902327e-05, + "loss": 0.20965576171875, + "step": 2914 + }, + { + "epoch": 0.19703934027308367, + "grad_norm": 1.6070812940597534, + "learning_rate": 2.795448874633062e-05, + "loss": 0.2646484375, + "step": 2915 + }, + { + "epoch": 0.19710693524401784, + "grad_norm": 0.6060361862182617, + "learning_rate": 2.795282506794019e-05, + "loss": 0.114654541015625, + "step": 2916 + }, + { + "epoch": 0.19717453021495202, + "grad_norm": 0.41550567746162415, + "learning_rate": 2.7951160762811524e-05, + "loss": 0.09209442138671875, + "step": 2917 + }, + { + "epoch": 0.19724212518588616, + "grad_norm": 0.8539574146270752, + "learning_rate": 2.7949495831025156e-05, + "loss": 0.1462860107421875, + "step": 2918 + }, + { + "epoch": 0.19730972015682033, + "grad_norm": 0.4627768099308014, + "learning_rate": 2.7947830272661656e-05, + "loss": 0.08098602294921875, + "step": 2919 + }, + { + "epoch": 0.1973773151277545, + "grad_norm": 0.7005521059036255, + "learning_rate": 2.79461640878016e-05, + "loss": 0.140716552734375, + "step": 2920 + }, + { + "epoch": 0.19744491009868867, + "grad_norm": 1.0427607297897339, + "learning_rate": 2.7944497276525613e-05, + "loss": 0.180328369140625, + "step": 2921 + }, + { + "epoch": 0.1975125050696228, + "grad_norm": 0.32112064957618713, + "learning_rate": 2.7942829838914352e-05, + "loss": 0.04885101318359375, + "step": 2922 + }, + { + "epoch": 0.19758010004055698, + "grad_norm": 0.6727070808410645, + "learning_rate": 2.794116177504849e-05, + "loss": 0.1573944091796875, + "step": 2923 + }, + { + "epoch": 0.19764769501149115, + "grad_norm": 0.5832932591438293, + "learning_rate": 2.7939493085008738e-05, + "loss": 0.131072998046875, + "step": 2924 + }, + { + "epoch": 0.1977152899824253, + "grad_norm": 0.7332266569137573, + "learning_rate": 2.7937823768875847e-05, + "loss": 0.086181640625, + "step": 2925 + }, + { + "epoch": 0.19778288495335947, + "grad_norm": 0.9028509259223938, + "learning_rate": 2.7936153826730577e-05, + "loss": 0.2196044921875, + "step": 2926 + }, + { + "epoch": 0.19785047992429364, + "grad_norm": 0.4790060818195343, + "learning_rate": 2.793448325865374e-05, + "loss": 0.09986114501953125, + "step": 2927 + }, + { + "epoch": 0.1979180748952278, + "grad_norm": 0.9004369378089905, + "learning_rate": 2.793281206472616e-05, + "loss": 0.1218109130859375, + "step": 2928 + }, + { + "epoch": 0.19798566986616195, + "grad_norm": 0.9940299391746521, + "learning_rate": 2.7931140245028705e-05, + "loss": 0.1904144287109375, + "step": 2929 + }, + { + "epoch": 0.19805326483709612, + "grad_norm": 0.7000892758369446, + "learning_rate": 2.792946779964227e-05, + "loss": 0.1602630615234375, + "step": 2930 + }, + { + "epoch": 0.1981208598080303, + "grad_norm": 0.6274284720420837, + "learning_rate": 2.7927794728647774e-05, + "loss": 0.0940399169921875, + "step": 2931 + }, + { + "epoch": 0.19818845477896443, + "grad_norm": 1.930929183959961, + "learning_rate": 2.7926121032126172e-05, + "loss": 0.2587890625, + "step": 2932 + }, + { + "epoch": 0.1982560497498986, + "grad_norm": 0.4134771525859833, + "learning_rate": 2.7924446710158446e-05, + "loss": 0.072967529296875, + "step": 2933 + }, + { + "epoch": 0.19832364472083278, + "grad_norm": 0.44909942150115967, + "learning_rate": 2.7922771762825618e-05, + "loss": 0.102508544921875, + "step": 2934 + }, + { + "epoch": 0.19839123969176695, + "grad_norm": 0.4963376224040985, + "learning_rate": 2.7921096190208718e-05, + "loss": 0.0909881591796875, + "step": 2935 + }, + { + "epoch": 0.1984588346627011, + "grad_norm": 0.5544523000717163, + "learning_rate": 2.7919419992388833e-05, + "loss": 0.12334632873535156, + "step": 2936 + }, + { + "epoch": 0.19852642963363526, + "grad_norm": 0.2709232270717621, + "learning_rate": 2.7917743169447064e-05, + "loss": 0.037078857421875, + "step": 2937 + }, + { + "epoch": 0.19859402460456943, + "grad_norm": 0.5621365308761597, + "learning_rate": 2.7916065721464544e-05, + "loss": 0.118927001953125, + "step": 2938 + }, + { + "epoch": 0.19866161957550357, + "grad_norm": 0.7737041115760803, + "learning_rate": 2.7914387648522444e-05, + "loss": 0.1585845947265625, + "step": 2939 + }, + { + "epoch": 0.19872921454643774, + "grad_norm": 1.3746598958969116, + "learning_rate": 2.791270895070195e-05, + "loss": 0.194610595703125, + "step": 2940 + }, + { + "epoch": 0.19879680951737191, + "grad_norm": 2.2112996578216553, + "learning_rate": 2.7911029628084295e-05, + "loss": 0.18389892578125, + "step": 2941 + }, + { + "epoch": 0.19886440448830606, + "grad_norm": 0.33080026507377625, + "learning_rate": 2.790934968075074e-05, + "loss": 0.0667877197265625, + "step": 2942 + }, + { + "epoch": 0.19893199945924023, + "grad_norm": 0.36736807227134705, + "learning_rate": 2.790766910878256e-05, + "loss": 0.05571746826171875, + "step": 2943 + }, + { + "epoch": 0.1989995944301744, + "grad_norm": 0.7718450427055359, + "learning_rate": 2.7905987912261075e-05, + "loss": 0.185302734375, + "step": 2944 + }, + { + "epoch": 0.19906718940110857, + "grad_norm": 0.4263206124305725, + "learning_rate": 2.7904306091267637e-05, + "loss": 0.0781707763671875, + "step": 2945 + }, + { + "epoch": 0.1991347843720427, + "grad_norm": 0.5089257955551147, + "learning_rate": 2.7902623645883618e-05, + "loss": 0.09772491455078125, + "step": 2946 + }, + { + "epoch": 0.19920237934297688, + "grad_norm": 0.21929335594177246, + "learning_rate": 2.7900940576190427e-05, + "loss": 0.03533172607421875, + "step": 2947 + }, + { + "epoch": 0.19926997431391105, + "grad_norm": 0.7091591954231262, + "learning_rate": 2.7899256882269503e-05, + "loss": 0.1398468017578125, + "step": 2948 + }, + { + "epoch": 0.1993375692848452, + "grad_norm": 1.4750854969024658, + "learning_rate": 2.7897572564202306e-05, + "loss": 0.1576385498046875, + "step": 2949 + }, + { + "epoch": 0.19940516425577937, + "grad_norm": 0.7469638586044312, + "learning_rate": 2.789588762207034e-05, + "loss": 0.140960693359375, + "step": 2950 + }, + { + "epoch": 0.19947275922671354, + "grad_norm": 1.1887171268463135, + "learning_rate": 2.7894202055955132e-05, + "loss": 0.19437026977539062, + "step": 2951 + }, + { + "epoch": 0.1995403541976477, + "grad_norm": 0.8362745046615601, + "learning_rate": 2.7892515865938247e-05, + "loss": 0.1431732177734375, + "step": 2952 + }, + { + "epoch": 0.19960794916858185, + "grad_norm": 0.3549063801765442, + "learning_rate": 2.7890829052101264e-05, + "loss": 0.046154022216796875, + "step": 2953 + }, + { + "epoch": 0.19967554413951602, + "grad_norm": 1.1666209697723389, + "learning_rate": 2.78891416145258e-05, + "loss": 0.11974334716796875, + "step": 2954 + }, + { + "epoch": 0.1997431391104502, + "grad_norm": 0.3669516444206238, + "learning_rate": 2.788745355329351e-05, + "loss": 0.062347412109375, + "step": 2955 + }, + { + "epoch": 0.19981073408138433, + "grad_norm": 0.6394319534301758, + "learning_rate": 2.7885764868486067e-05, + "loss": 0.076995849609375, + "step": 2956 + }, + { + "epoch": 0.1998783290523185, + "grad_norm": 0.7531644105911255, + "learning_rate": 2.788407556018519e-05, + "loss": 0.14501953125, + "step": 2957 + }, + { + "epoch": 0.19994592402325267, + "grad_norm": 0.39267945289611816, + "learning_rate": 2.788238562847261e-05, + "loss": 0.058773040771484375, + "step": 2958 + }, + { + "epoch": 0.20001351899418685, + "grad_norm": 0.685222864151001, + "learning_rate": 2.78806950734301e-05, + "loss": 0.186187744140625, + "step": 2959 + }, + { + "epoch": 0.200081113965121, + "grad_norm": 0.608122706413269, + "learning_rate": 2.787900389513946e-05, + "loss": 0.1426239013671875, + "step": 2960 + }, + { + "epoch": 0.20014870893605516, + "grad_norm": 0.5314895510673523, + "learning_rate": 2.7877312093682512e-05, + "loss": 0.07975006103515625, + "step": 2961 + }, + { + "epoch": 0.20021630390698933, + "grad_norm": 0.39442649483680725, + "learning_rate": 2.7875619669141127e-05, + "loss": 0.042102813720703125, + "step": 2962 + }, + { + "epoch": 0.20028389887792347, + "grad_norm": 0.37011027336120605, + "learning_rate": 2.7873926621597187e-05, + "loss": 0.054347991943359375, + "step": 2963 + }, + { + "epoch": 0.20035149384885764, + "grad_norm": 0.7387145161628723, + "learning_rate": 2.7872232951132614e-05, + "loss": 0.150421142578125, + "step": 2964 + }, + { + "epoch": 0.2004190888197918, + "grad_norm": 0.5823323726654053, + "learning_rate": 2.7870538657829362e-05, + "loss": 0.10396575927734375, + "step": 2965 + }, + { + "epoch": 0.20048668379072598, + "grad_norm": 0.5665686130523682, + "learning_rate": 2.786884374176941e-05, + "loss": 0.12812042236328125, + "step": 2966 + }, + { + "epoch": 0.20055427876166013, + "grad_norm": 0.4723433256149292, + "learning_rate": 2.786714820303476e-05, + "loss": 0.07774162292480469, + "step": 2967 + }, + { + "epoch": 0.2006218737325943, + "grad_norm": 0.30759063363075256, + "learning_rate": 2.786545204170747e-05, + "loss": 0.06351852416992188, + "step": 2968 + }, + { + "epoch": 0.20068946870352847, + "grad_norm": 1.3870058059692383, + "learning_rate": 2.7863755257869592e-05, + "loss": 0.11148834228515625, + "step": 2969 + }, + { + "epoch": 0.2007570636744626, + "grad_norm": 0.7986377477645874, + "learning_rate": 2.7862057851603237e-05, + "loss": 0.1605224609375, + "step": 2970 + }, + { + "epoch": 0.20082465864539678, + "grad_norm": 0.5286405086517334, + "learning_rate": 2.786035982299054e-05, + "loss": 0.08971405029296875, + "step": 2971 + }, + { + "epoch": 0.20089225361633095, + "grad_norm": 1.2306469678878784, + "learning_rate": 2.7858661172113654e-05, + "loss": 0.13268280029296875, + "step": 2972 + }, + { + "epoch": 0.2009598485872651, + "grad_norm": 0.21400049328804016, + "learning_rate": 2.785696189905477e-05, + "loss": 0.040637969970703125, + "step": 2973 + }, + { + "epoch": 0.20102744355819926, + "grad_norm": 0.7819842100143433, + "learning_rate": 2.7855262003896118e-05, + "loss": 0.161590576171875, + "step": 2974 + }, + { + "epoch": 0.20109503852913344, + "grad_norm": 0.5051897168159485, + "learning_rate": 2.785356148671994e-05, + "loss": 0.10885238647460938, + "step": 2975 + }, + { + "epoch": 0.2011626335000676, + "grad_norm": 2.231405019760132, + "learning_rate": 2.7851860347608526e-05, + "loss": 0.2802734375, + "step": 2976 + }, + { + "epoch": 0.20123022847100175, + "grad_norm": 0.7113054394721985, + "learning_rate": 2.7850158586644186e-05, + "loss": 0.16333770751953125, + "step": 2977 + }, + { + "epoch": 0.20129782344193592, + "grad_norm": 0.7876545190811157, + "learning_rate": 2.7848456203909252e-05, + "loss": 0.1613616943359375, + "step": 2978 + }, + { + "epoch": 0.2013654184128701, + "grad_norm": 0.5925140380859375, + "learning_rate": 2.7846753199486106e-05, + "loss": 0.1244354248046875, + "step": 2979 + }, + { + "epoch": 0.20143301338380423, + "grad_norm": 0.7331434488296509, + "learning_rate": 2.784504957345715e-05, + "loss": 0.12723541259765625, + "step": 2980 + }, + { + "epoch": 0.2015006083547384, + "grad_norm": 0.808071494102478, + "learning_rate": 2.784334532590481e-05, + "loss": 0.13401031494140625, + "step": 2981 + }, + { + "epoch": 0.20156820332567257, + "grad_norm": 2.506113052368164, + "learning_rate": 2.7841640456911555e-05, + "loss": 0.2320556640625, + "step": 2982 + }, + { + "epoch": 0.20163579829660674, + "grad_norm": 0.2717035412788391, + "learning_rate": 2.7839934966559864e-05, + "loss": 0.0545196533203125, + "step": 2983 + }, + { + "epoch": 0.2017033932675409, + "grad_norm": 0.3896689713001251, + "learning_rate": 2.783822885493228e-05, + "loss": 0.04048919677734375, + "step": 2984 + }, + { + "epoch": 0.20177098823847506, + "grad_norm": 1.6251367330551147, + "learning_rate": 2.783652212211134e-05, + "loss": 0.21429443359375, + "step": 2985 + }, + { + "epoch": 0.20183858320940923, + "grad_norm": 0.4630642235279083, + "learning_rate": 2.7834814768179623e-05, + "loss": 0.12203216552734375, + "step": 2986 + }, + { + "epoch": 0.20190617818034337, + "grad_norm": 0.34130147099494934, + "learning_rate": 2.783310679321976e-05, + "loss": 0.071807861328125, + "step": 2987 + }, + { + "epoch": 0.20197377315127754, + "grad_norm": 0.6968334317207336, + "learning_rate": 2.7831398197314374e-05, + "loss": 0.1488800048828125, + "step": 2988 + }, + { + "epoch": 0.2020413681222117, + "grad_norm": 1.1026360988616943, + "learning_rate": 2.782968898054615e-05, + "loss": 0.1419086456298828, + "step": 2989 + }, + { + "epoch": 0.20210896309314588, + "grad_norm": 1.4626296758651733, + "learning_rate": 2.782797914299778e-05, + "loss": 0.19683837890625, + "step": 2990 + }, + { + "epoch": 0.20217655806408003, + "grad_norm": 1.1670243740081787, + "learning_rate": 2.782626868475201e-05, + "loss": 0.234710693359375, + "step": 2991 + }, + { + "epoch": 0.2022441530350142, + "grad_norm": 0.4961417019367218, + "learning_rate": 2.7824557605891595e-05, + "loss": 0.08458709716796875, + "step": 2992 + }, + { + "epoch": 0.20231174800594837, + "grad_norm": 0.7455372214317322, + "learning_rate": 2.7822845906499326e-05, + "loss": 0.1003265380859375, + "step": 2993 + }, + { + "epoch": 0.2023793429768825, + "grad_norm": 0.4890083074569702, + "learning_rate": 2.7821133586658025e-05, + "loss": 0.1122283935546875, + "step": 2994 + }, + { + "epoch": 0.20244693794781668, + "grad_norm": 0.7251769304275513, + "learning_rate": 2.781942064645055e-05, + "loss": 0.13219451904296875, + "step": 2995 + }, + { + "epoch": 0.20251453291875085, + "grad_norm": 0.6146566867828369, + "learning_rate": 2.781770708595978e-05, + "loss": 0.14385223388671875, + "step": 2996 + }, + { + "epoch": 0.20258212788968502, + "grad_norm": 1.3464653491973877, + "learning_rate": 2.7815992905268628e-05, + "loss": 0.195220947265625, + "step": 2997 + }, + { + "epoch": 0.20264972286061916, + "grad_norm": 1.21747624874115, + "learning_rate": 2.781427810446004e-05, + "loss": 0.1565704345703125, + "step": 2998 + }, + { + "epoch": 0.20271731783155333, + "grad_norm": 0.31873247027397156, + "learning_rate": 2.7812562683616986e-05, + "loss": 0.053722381591796875, + "step": 2999 + }, + { + "epoch": 0.2027849128024875, + "grad_norm": 0.6957042217254639, + "learning_rate": 2.7810846642822466e-05, + "loss": 0.1475677490234375, + "step": 3000 + }, + { + "epoch": 0.20285250777342165, + "grad_norm": 0.2544870674610138, + "learning_rate": 2.7809129982159517e-05, + "loss": 0.043384552001953125, + "step": 3001 + }, + { + "epoch": 0.20292010274435582, + "grad_norm": 0.2618351876735687, + "learning_rate": 2.78074127017112e-05, + "loss": 0.0498199462890625, + "step": 3002 + }, + { + "epoch": 0.20298769771529, + "grad_norm": 1.336301326751709, + "learning_rate": 2.7805694801560608e-05, + "loss": 0.224822998046875, + "step": 3003 + }, + { + "epoch": 0.20305529268622413, + "grad_norm": 0.5492291450500488, + "learning_rate": 2.7803976281790864e-05, + "loss": 0.112518310546875, + "step": 3004 + }, + { + "epoch": 0.2031228876571583, + "grad_norm": 1.2397634983062744, + "learning_rate": 2.780225714248512e-05, + "loss": 0.241119384765625, + "step": 3005 + }, + { + "epoch": 0.20319048262809247, + "grad_norm": 0.8259292840957642, + "learning_rate": 2.7800537383726563e-05, + "loss": 0.07752609252929688, + "step": 3006 + }, + { + "epoch": 0.20325807759902664, + "grad_norm": 0.946738064289093, + "learning_rate": 2.77988170055984e-05, + "loss": 0.19342041015625, + "step": 3007 + }, + { + "epoch": 0.20332567256996079, + "grad_norm": 0.5502347946166992, + "learning_rate": 2.7797096008183874e-05, + "loss": 0.04302215576171875, + "step": 3008 + }, + { + "epoch": 0.20339326754089496, + "grad_norm": 1.4875847101211548, + "learning_rate": 2.779537439156626e-05, + "loss": 0.18658447265625, + "step": 3009 + }, + { + "epoch": 0.20346086251182913, + "grad_norm": 0.2942725718021393, + "learning_rate": 2.7793652155828858e-05, + "loss": 0.055377960205078125, + "step": 3010 + }, + { + "epoch": 0.20352845748276327, + "grad_norm": 1.4463887214660645, + "learning_rate": 2.7791929301054998e-05, + "loss": 0.26849365234375, + "step": 3011 + }, + { + "epoch": 0.20359605245369744, + "grad_norm": 0.757409930229187, + "learning_rate": 2.779020582732805e-05, + "loss": 0.142791748046875, + "step": 3012 + }, + { + "epoch": 0.2036636474246316, + "grad_norm": 0.6494478583335876, + "learning_rate": 2.7788481734731403e-05, + "loss": 0.1309356689453125, + "step": 3013 + }, + { + "epoch": 0.20373124239556578, + "grad_norm": 0.4512355923652649, + "learning_rate": 2.778675702334848e-05, + "loss": 0.06727409362792969, + "step": 3014 + }, + { + "epoch": 0.20379883736649992, + "grad_norm": 0.4157246947288513, + "learning_rate": 2.7785031693262732e-05, + "loss": 0.05713653564453125, + "step": 3015 + }, + { + "epoch": 0.2038664323374341, + "grad_norm": 0.5034378170967102, + "learning_rate": 2.778330574455764e-05, + "loss": 0.06779098510742188, + "step": 3016 + }, + { + "epoch": 0.20393402730836827, + "grad_norm": 0.6372075080871582, + "learning_rate": 2.778157917731672e-05, + "loss": 0.12249755859375, + "step": 3017 + }, + { + "epoch": 0.2040016222793024, + "grad_norm": 0.31506073474884033, + "learning_rate": 2.777985199162351e-05, + "loss": 0.04343414306640625, + "step": 3018 + }, + { + "epoch": 0.20406921725023658, + "grad_norm": 0.7907793521881104, + "learning_rate": 2.777812418756158e-05, + "loss": 0.140777587890625, + "step": 3019 + }, + { + "epoch": 0.20413681222117075, + "grad_norm": 0.8498079180717468, + "learning_rate": 2.777639576521454e-05, + "loss": 0.09905624389648438, + "step": 3020 + }, + { + "epoch": 0.20420440719210492, + "grad_norm": 1.6313084363937378, + "learning_rate": 2.7774666724666015e-05, + "loss": 0.25518798828125, + "step": 3021 + }, + { + "epoch": 0.20427200216303906, + "grad_norm": 1.1905598640441895, + "learning_rate": 2.777293706599967e-05, + "loss": 0.16870880126953125, + "step": 3022 + }, + { + "epoch": 0.20433959713397323, + "grad_norm": 0.8576928973197937, + "learning_rate": 2.7771206789299198e-05, + "loss": 0.1389312744140625, + "step": 3023 + }, + { + "epoch": 0.2044071921049074, + "grad_norm": 0.44417455792427063, + "learning_rate": 2.7769475894648315e-05, + "loss": 0.0534210205078125, + "step": 3024 + }, + { + "epoch": 0.20447478707584155, + "grad_norm": 0.8229403495788574, + "learning_rate": 2.7767744382130775e-05, + "loss": 0.208160400390625, + "step": 3025 + }, + { + "epoch": 0.20454238204677572, + "grad_norm": 0.312174916267395, + "learning_rate": 2.7766012251830364e-05, + "loss": 0.057987213134765625, + "step": 3026 + }, + { + "epoch": 0.2046099770177099, + "grad_norm": 0.992835521697998, + "learning_rate": 2.7764279503830888e-05, + "loss": 0.23638916015625, + "step": 3027 + }, + { + "epoch": 0.20467757198864406, + "grad_norm": 0.2694757282733917, + "learning_rate": 2.7762546138216187e-05, + "loss": 0.06156158447265625, + "step": 3028 + }, + { + "epoch": 0.2047451669595782, + "grad_norm": 0.7404396533966064, + "learning_rate": 2.7760812155070136e-05, + "loss": 0.175933837890625, + "step": 3029 + }, + { + "epoch": 0.20481276193051237, + "grad_norm": 0.6712900996208191, + "learning_rate": 2.7759077554476634e-05, + "loss": 0.173583984375, + "step": 3030 + }, + { + "epoch": 0.20488035690144654, + "grad_norm": 0.5987997055053711, + "learning_rate": 2.7757342336519615e-05, + "loss": 0.0773162841796875, + "step": 3031 + }, + { + "epoch": 0.20494795187238068, + "grad_norm": 0.9690448641777039, + "learning_rate": 2.775560650128303e-05, + "loss": 0.2366790771484375, + "step": 3032 + }, + { + "epoch": 0.20501554684331486, + "grad_norm": 1.2450374364852905, + "learning_rate": 2.775387004885088e-05, + "loss": 0.1858367919921875, + "step": 3033 + }, + { + "epoch": 0.20508314181424903, + "grad_norm": 0.30374446511268616, + "learning_rate": 2.775213297930718e-05, + "loss": 0.0647735595703125, + "step": 3034 + }, + { + "epoch": 0.20515073678518317, + "grad_norm": 0.431862473487854, + "learning_rate": 2.7750395292735987e-05, + "loss": 0.1098480224609375, + "step": 3035 + }, + { + "epoch": 0.20521833175611734, + "grad_norm": 0.2794783115386963, + "learning_rate": 2.774865698922137e-05, + "loss": 0.05035400390625, + "step": 3036 + }, + { + "epoch": 0.2052859267270515, + "grad_norm": 0.9734418392181396, + "learning_rate": 2.7746918068847447e-05, + "loss": 0.1445159912109375, + "step": 3037 + }, + { + "epoch": 0.20535352169798568, + "grad_norm": 0.5849259495735168, + "learning_rate": 2.7745178531698352e-05, + "loss": 0.10771560668945312, + "step": 3038 + }, + { + "epoch": 0.20542111666891982, + "grad_norm": 0.3776237368583679, + "learning_rate": 2.7743438377858267e-05, + "loss": 0.050506591796875, + "step": 3039 + }, + { + "epoch": 0.205488711639854, + "grad_norm": 0.9268108010292053, + "learning_rate": 2.7741697607411376e-05, + "loss": 0.14336395263671875, + "step": 3040 + }, + { + "epoch": 0.20555630661078816, + "grad_norm": 0.38374602794647217, + "learning_rate": 2.773995622044192e-05, + "loss": 0.055736541748046875, + "step": 3041 + }, + { + "epoch": 0.2056239015817223, + "grad_norm": 1.1704096794128418, + "learning_rate": 2.7738214217034147e-05, + "loss": 0.168182373046875, + "step": 3042 + }, + { + "epoch": 0.20569149655265648, + "grad_norm": 2.667861223220825, + "learning_rate": 2.7736471597272355e-05, + "loss": 0.27587890625, + "step": 3043 + }, + { + "epoch": 0.20575909152359065, + "grad_norm": 0.46222153306007385, + "learning_rate": 2.7734728361240857e-05, + "loss": 0.091522216796875, + "step": 3044 + }, + { + "epoch": 0.20582668649452482, + "grad_norm": 1.2332602739334106, + "learning_rate": 2.773298450902401e-05, + "loss": 0.21490478515625, + "step": 3045 + }, + { + "epoch": 0.20589428146545896, + "grad_norm": 0.5949699282646179, + "learning_rate": 2.7731240040706183e-05, + "loss": 0.07930374145507812, + "step": 3046 + }, + { + "epoch": 0.20596187643639313, + "grad_norm": 0.23430156707763672, + "learning_rate": 2.7729494956371792e-05, + "loss": 0.0367889404296875, + "step": 3047 + }, + { + "epoch": 0.2060294714073273, + "grad_norm": 0.9198998808860779, + "learning_rate": 2.7727749256105266e-05, + "loss": 0.18328857421875, + "step": 3048 + }, + { + "epoch": 0.20609706637826145, + "grad_norm": 0.7003703117370605, + "learning_rate": 2.7726002939991084e-05, + "loss": 0.07379150390625, + "step": 3049 + }, + { + "epoch": 0.20616466134919562, + "grad_norm": 1.4293302297592163, + "learning_rate": 2.7724256008113733e-05, + "loss": 0.13533782958984375, + "step": 3050 + }, + { + "epoch": 0.2062322563201298, + "grad_norm": 0.2214641571044922, + "learning_rate": 2.7722508460557742e-05, + "loss": 0.036075592041015625, + "step": 3051 + }, + { + "epoch": 0.20629985129106396, + "grad_norm": 0.45479142665863037, + "learning_rate": 2.7720760297407675e-05, + "loss": 0.0843048095703125, + "step": 3052 + }, + { + "epoch": 0.2063674462619981, + "grad_norm": 1.456896424293518, + "learning_rate": 2.7719011518748117e-05, + "loss": 0.194793701171875, + "step": 3053 + }, + { + "epoch": 0.20643504123293227, + "grad_norm": 0.831071138381958, + "learning_rate": 2.771726212466368e-05, + "loss": 0.11150360107421875, + "step": 3054 + }, + { + "epoch": 0.20650263620386644, + "grad_norm": 0.8292427062988281, + "learning_rate": 2.7715512115239014e-05, + "loss": 0.14299774169921875, + "step": 3055 + }, + { + "epoch": 0.20657023117480058, + "grad_norm": 0.26023685932159424, + "learning_rate": 2.7713761490558798e-05, + "loss": 0.047313690185546875, + "step": 3056 + }, + { + "epoch": 0.20663782614573475, + "grad_norm": 0.8187421560287476, + "learning_rate": 2.7712010250707735e-05, + "loss": 0.1132659912109375, + "step": 3057 + }, + { + "epoch": 0.20670542111666892, + "grad_norm": 0.6053853034973145, + "learning_rate": 2.771025839577056e-05, + "loss": 0.182769775390625, + "step": 3058 + }, + { + "epoch": 0.2067730160876031, + "grad_norm": 0.6703214645385742, + "learning_rate": 2.7708505925832034e-05, + "loss": 0.1629791259765625, + "step": 3059 + }, + { + "epoch": 0.20684061105853724, + "grad_norm": 0.6005720496177673, + "learning_rate": 2.7706752840976966e-05, + "loss": 0.11951446533203125, + "step": 3060 + }, + { + "epoch": 0.2069082060294714, + "grad_norm": 0.8191282153129578, + "learning_rate": 2.770499914129017e-05, + "loss": 0.133514404296875, + "step": 3061 + }, + { + "epoch": 0.20697580100040558, + "grad_norm": 1.0136308670043945, + "learning_rate": 2.7703244826856504e-05, + "loss": 0.160888671875, + "step": 3062 + }, + { + "epoch": 0.20704339597133972, + "grad_norm": 0.5477318167686462, + "learning_rate": 2.7701489897760848e-05, + "loss": 0.07391357421875, + "step": 3063 + }, + { + "epoch": 0.2071109909422739, + "grad_norm": 0.8763701319694519, + "learning_rate": 2.7699734354088125e-05, + "loss": 0.1223297119140625, + "step": 3064 + }, + { + "epoch": 0.20717858591320806, + "grad_norm": 0.4565682113170624, + "learning_rate": 2.7697978195923276e-05, + "loss": 0.099853515625, + "step": 3065 + }, + { + "epoch": 0.2072461808841422, + "grad_norm": 0.4239516258239746, + "learning_rate": 2.7696221423351277e-05, + "loss": 0.094085693359375, + "step": 3066 + }, + { + "epoch": 0.20731377585507638, + "grad_norm": 0.3965635895729065, + "learning_rate": 2.769446403645712e-05, + "loss": 0.06089019775390625, + "step": 3067 + }, + { + "epoch": 0.20738137082601055, + "grad_norm": 0.2502635419368744, + "learning_rate": 2.7692706035325854e-05, + "loss": 0.04921722412109375, + "step": 3068 + }, + { + "epoch": 0.20744896579694472, + "grad_norm": 1.7404993772506714, + "learning_rate": 2.7690947420042535e-05, + "loss": 0.14337921142578125, + "step": 3069 + }, + { + "epoch": 0.20751656076787886, + "grad_norm": 1.0814619064331055, + "learning_rate": 2.7689188190692253e-05, + "loss": 0.187713623046875, + "step": 3070 + }, + { + "epoch": 0.20758415573881303, + "grad_norm": 0.8663774728775024, + "learning_rate": 2.768742834736013e-05, + "loss": 0.13788604736328125, + "step": 3071 + }, + { + "epoch": 0.2076517507097472, + "grad_norm": 0.3168509602546692, + "learning_rate": 2.768566789013133e-05, + "loss": 0.051509857177734375, + "step": 3072 + }, + { + "epoch": 0.20771934568068134, + "grad_norm": 0.8974269032478333, + "learning_rate": 2.7683906819091022e-05, + "loss": 0.233306884765625, + "step": 3073 + }, + { + "epoch": 0.20778694065161551, + "grad_norm": 1.0531790256500244, + "learning_rate": 2.7682145134324416e-05, + "loss": 0.1613025665283203, + "step": 3074 + }, + { + "epoch": 0.20785453562254969, + "grad_norm": 0.7660130858421326, + "learning_rate": 2.7680382835916766e-05, + "loss": 0.1453857421875, + "step": 3075 + }, + { + "epoch": 0.20792213059348386, + "grad_norm": 0.6183528900146484, + "learning_rate": 2.7678619923953336e-05, + "loss": 0.084014892578125, + "step": 3076 + }, + { + "epoch": 0.207989725564418, + "grad_norm": 0.3975472152233124, + "learning_rate": 2.7676856398519422e-05, + "loss": 0.04066276550292969, + "step": 3077 + }, + { + "epoch": 0.20805732053535217, + "grad_norm": 1.123818039894104, + "learning_rate": 2.767509225970036e-05, + "loss": 0.2026824951171875, + "step": 3078 + }, + { + "epoch": 0.20812491550628634, + "grad_norm": 0.874866247177124, + "learning_rate": 2.767332750758151e-05, + "loss": 0.176666259765625, + "step": 3079 + }, + { + "epoch": 0.20819251047722048, + "grad_norm": 0.9846891164779663, + "learning_rate": 2.767156214224826e-05, + "loss": 0.10157394409179688, + "step": 3080 + }, + { + "epoch": 0.20826010544815465, + "grad_norm": 0.47767794132232666, + "learning_rate": 2.766979616378603e-05, + "loss": 0.092529296875, + "step": 3081 + }, + { + "epoch": 0.20832770041908882, + "grad_norm": 0.5608116388320923, + "learning_rate": 2.766802957228027e-05, + "loss": 0.1360626220703125, + "step": 3082 + }, + { + "epoch": 0.208395295390023, + "grad_norm": 0.3839634954929352, + "learning_rate": 2.7666262367816457e-05, + "loss": 0.0685272216796875, + "step": 3083 + }, + { + "epoch": 0.20846289036095714, + "grad_norm": 2.0590150356292725, + "learning_rate": 2.7664494550480098e-05, + "loss": 0.30224609375, + "step": 3084 + }, + { + "epoch": 0.2085304853318913, + "grad_norm": 0.8122022151947021, + "learning_rate": 2.766272612035674e-05, + "loss": 0.140350341796875, + "step": 3085 + }, + { + "epoch": 0.20859808030282548, + "grad_norm": 1.3061010837554932, + "learning_rate": 2.766095707753194e-05, + "loss": 0.2271728515625, + "step": 3086 + }, + { + "epoch": 0.20866567527375962, + "grad_norm": 0.8713530898094177, + "learning_rate": 2.7659187422091295e-05, + "loss": 0.087615966796875, + "step": 3087 + }, + { + "epoch": 0.2087332702446938, + "grad_norm": 0.6819984316825867, + "learning_rate": 2.765741715412044e-05, + "loss": 0.099822998046875, + "step": 3088 + }, + { + "epoch": 0.20880086521562796, + "grad_norm": 0.33900168538093567, + "learning_rate": 2.7655646273705025e-05, + "loss": 0.047634124755859375, + "step": 3089 + }, + { + "epoch": 0.20886846018656213, + "grad_norm": 0.9042877554893494, + "learning_rate": 2.765387478093074e-05, + "loss": 0.232666015625, + "step": 3090 + }, + { + "epoch": 0.20893605515749628, + "grad_norm": 0.765669047832489, + "learning_rate": 2.76521026758833e-05, + "loss": 0.11089324951171875, + "step": 3091 + }, + { + "epoch": 0.20900365012843045, + "grad_norm": 0.9846020936965942, + "learning_rate": 2.7650329958648452e-05, + "loss": 0.12833404541015625, + "step": 3092 + }, + { + "epoch": 0.20907124509936462, + "grad_norm": 1.793933629989624, + "learning_rate": 2.7648556629311962e-05, + "loss": 0.13390350341796875, + "step": 3093 + }, + { + "epoch": 0.20913884007029876, + "grad_norm": 0.7248508334159851, + "learning_rate": 2.7646782687959647e-05, + "loss": 0.09145355224609375, + "step": 3094 + }, + { + "epoch": 0.20920643504123293, + "grad_norm": 0.8951168656349182, + "learning_rate": 2.7645008134677336e-05, + "loss": 0.13988494873046875, + "step": 3095 + }, + { + "epoch": 0.2092740300121671, + "grad_norm": 0.34026312828063965, + "learning_rate": 2.764323296955089e-05, + "loss": 0.06391143798828125, + "step": 3096 + }, + { + "epoch": 0.20934162498310127, + "grad_norm": 0.8083912134170532, + "learning_rate": 2.764145719266621e-05, + "loss": 0.11923980712890625, + "step": 3097 + }, + { + "epoch": 0.2094092199540354, + "grad_norm": 0.9400172829627991, + "learning_rate": 2.763968080410921e-05, + "loss": 0.183258056640625, + "step": 3098 + }, + { + "epoch": 0.20947681492496958, + "grad_norm": 0.47758427262306213, + "learning_rate": 2.7637903803965852e-05, + "loss": 0.0660400390625, + "step": 3099 + }, + { + "epoch": 0.20954440989590375, + "grad_norm": 0.9750581383705139, + "learning_rate": 2.7636126192322113e-05, + "loss": 0.1916656494140625, + "step": 3100 + }, + { + "epoch": 0.2096120048668379, + "grad_norm": 0.5646058917045593, + "learning_rate": 2.7634347969264002e-05, + "loss": 0.12129974365234375, + "step": 3101 + }, + { + "epoch": 0.20967959983777207, + "grad_norm": 1.3319616317749023, + "learning_rate": 2.7632569134877568e-05, + "loss": 0.18955230712890625, + "step": 3102 + }, + { + "epoch": 0.20974719480870624, + "grad_norm": 0.8920730352401733, + "learning_rate": 2.7630789689248873e-05, + "loss": 0.2017822265625, + "step": 3103 + }, + { + "epoch": 0.20981478977964038, + "grad_norm": 0.6111076474189758, + "learning_rate": 2.762900963246403e-05, + "loss": 0.09336090087890625, + "step": 3104 + }, + { + "epoch": 0.20988238475057455, + "grad_norm": 1.4482414722442627, + "learning_rate": 2.7627228964609156e-05, + "loss": 0.197662353515625, + "step": 3105 + }, + { + "epoch": 0.20994997972150872, + "grad_norm": 0.5529834032058716, + "learning_rate": 2.7625447685770417e-05, + "loss": 0.08300018310546875, + "step": 3106 + }, + { + "epoch": 0.2100175746924429, + "grad_norm": 0.6697968244552612, + "learning_rate": 2.7623665796034004e-05, + "loss": 0.092254638671875, + "step": 3107 + }, + { + "epoch": 0.21008516966337704, + "grad_norm": 0.9286326766014099, + "learning_rate": 2.7621883295486137e-05, + "loss": 0.1174774169921875, + "step": 3108 + }, + { + "epoch": 0.2101527646343112, + "grad_norm": 0.7276982069015503, + "learning_rate": 2.7620100184213054e-05, + "loss": 0.1413726806640625, + "step": 3109 + }, + { + "epoch": 0.21022035960524538, + "grad_norm": 0.6352155804634094, + "learning_rate": 2.761831646230105e-05, + "loss": 0.12865447998046875, + "step": 3110 + }, + { + "epoch": 0.21028795457617952, + "grad_norm": 0.3437652587890625, + "learning_rate": 2.7616532129836415e-05, + "loss": 0.05533599853515625, + "step": 3111 + }, + { + "epoch": 0.2103555495471137, + "grad_norm": 0.22459061443805695, + "learning_rate": 2.7614747186905498e-05, + "loss": 0.03847503662109375, + "step": 3112 + }, + { + "epoch": 0.21042314451804786, + "grad_norm": 1.1483274698257446, + "learning_rate": 2.7612961633594663e-05, + "loss": 0.1523284912109375, + "step": 3113 + }, + { + "epoch": 0.21049073948898203, + "grad_norm": 0.907663106918335, + "learning_rate": 2.76111754699903e-05, + "loss": 0.11053466796875, + "step": 3114 + }, + { + "epoch": 0.21055833445991617, + "grad_norm": 1.1304659843444824, + "learning_rate": 2.7609388696178843e-05, + "loss": 0.2034912109375, + "step": 3115 + }, + { + "epoch": 0.21062592943085034, + "grad_norm": 2.711954355239868, + "learning_rate": 2.7607601312246745e-05, + "loss": 0.2481689453125, + "step": 3116 + }, + { + "epoch": 0.21069352440178452, + "grad_norm": 0.3547629415988922, + "learning_rate": 2.760581331828049e-05, + "loss": 0.05045318603515625, + "step": 3117 + }, + { + "epoch": 0.21076111937271866, + "grad_norm": 1.4384208917617798, + "learning_rate": 2.760402471436659e-05, + "loss": 0.160858154296875, + "step": 3118 + }, + { + "epoch": 0.21082871434365283, + "grad_norm": 0.9969213008880615, + "learning_rate": 2.760223550059159e-05, + "loss": 0.12308502197265625, + "step": 3119 + }, + { + "epoch": 0.210896309314587, + "grad_norm": 0.9286792874336243, + "learning_rate": 2.760044567704206e-05, + "loss": 0.146392822265625, + "step": 3120 + }, + { + "epoch": 0.21096390428552117, + "grad_norm": 0.8139071464538574, + "learning_rate": 2.7598655243804618e-05, + "loss": 0.1134033203125, + "step": 3121 + }, + { + "epoch": 0.2110314992564553, + "grad_norm": 0.5304183959960938, + "learning_rate": 2.7596864200965873e-05, + "loss": 0.10020065307617188, + "step": 3122 + }, + { + "epoch": 0.21109909422738948, + "grad_norm": 0.24229669570922852, + "learning_rate": 2.759507254861251e-05, + "loss": 0.04972076416015625, + "step": 3123 + }, + { + "epoch": 0.21116668919832365, + "grad_norm": 0.7082515954971313, + "learning_rate": 2.7593280286831205e-05, + "loss": 0.13346099853515625, + "step": 3124 + }, + { + "epoch": 0.2112342841692578, + "grad_norm": 0.516963541507721, + "learning_rate": 2.759148741570868e-05, + "loss": 0.0844879150390625, + "step": 3125 + }, + { + "epoch": 0.21130187914019197, + "grad_norm": 0.5983502864837646, + "learning_rate": 2.758969393533169e-05, + "loss": 0.0726165771484375, + "step": 3126 + }, + { + "epoch": 0.21136947411112614, + "grad_norm": 0.7737400531768799, + "learning_rate": 2.7587899845787014e-05, + "loss": 0.129241943359375, + "step": 3127 + }, + { + "epoch": 0.2114370690820603, + "grad_norm": 0.5395578742027283, + "learning_rate": 2.7586105147161458e-05, + "loss": 0.067169189453125, + "step": 3128 + }, + { + "epoch": 0.21150466405299445, + "grad_norm": 0.3719494044780731, + "learning_rate": 2.7584309839541867e-05, + "loss": 0.07862091064453125, + "step": 3129 + }, + { + "epoch": 0.21157225902392862, + "grad_norm": 1.2288938760757446, + "learning_rate": 2.75825139230151e-05, + "loss": 0.210357666015625, + "step": 3130 + }, + { + "epoch": 0.2116398539948628, + "grad_norm": 1.4878343343734741, + "learning_rate": 2.7580717397668065e-05, + "loss": 0.12894439697265625, + "step": 3131 + }, + { + "epoch": 0.21170744896579693, + "grad_norm": 0.2949409484863281, + "learning_rate": 2.7578920263587677e-05, + "loss": 0.04632568359375, + "step": 3132 + }, + { + "epoch": 0.2117750439367311, + "grad_norm": 1.7092210054397583, + "learning_rate": 2.7577122520860906e-05, + "loss": 0.288848876953125, + "step": 3133 + }, + { + "epoch": 0.21184263890766528, + "grad_norm": 0.4095827639102936, + "learning_rate": 2.7575324169574725e-05, + "loss": 0.105499267578125, + "step": 3134 + }, + { + "epoch": 0.21191023387859942, + "grad_norm": 1.2524043321609497, + "learning_rate": 2.757352520981616e-05, + "loss": 0.1712646484375, + "step": 3135 + }, + { + "epoch": 0.2119778288495336, + "grad_norm": 0.48231634497642517, + "learning_rate": 2.757172564167225e-05, + "loss": 0.04926300048828125, + "step": 3136 + }, + { + "epoch": 0.21204542382046776, + "grad_norm": 1.1232998371124268, + "learning_rate": 2.7569925465230068e-05, + "loss": 0.251495361328125, + "step": 3137 + }, + { + "epoch": 0.21211301879140193, + "grad_norm": 0.33239269256591797, + "learning_rate": 2.7568124680576726e-05, + "loss": 0.032459259033203125, + "step": 3138 + }, + { + "epoch": 0.21218061376233607, + "grad_norm": 0.6888588070869446, + "learning_rate": 2.7566323287799346e-05, + "loss": 0.148834228515625, + "step": 3139 + }, + { + "epoch": 0.21224820873327024, + "grad_norm": 0.8157955408096313, + "learning_rate": 2.75645212869851e-05, + "loss": 0.1157379150390625, + "step": 3140 + }, + { + "epoch": 0.21231580370420441, + "grad_norm": 0.6640103459358215, + "learning_rate": 2.7562718678221177e-05, + "loss": 0.1172637939453125, + "step": 3141 + }, + { + "epoch": 0.21238339867513856, + "grad_norm": 0.5981087684631348, + "learning_rate": 2.7560915461594792e-05, + "loss": 0.11234283447265625, + "step": 3142 + }, + { + "epoch": 0.21245099364607273, + "grad_norm": 0.46900010108947754, + "learning_rate": 2.7559111637193205e-05, + "loss": 0.1136474609375, + "step": 3143 + }, + { + "epoch": 0.2125185886170069, + "grad_norm": 0.2438773661851883, + "learning_rate": 2.7557307205103692e-05, + "loss": 0.0450592041015625, + "step": 3144 + }, + { + "epoch": 0.21258618358794107, + "grad_norm": 0.7878502607345581, + "learning_rate": 2.7555502165413567e-05, + "loss": 0.1309661865234375, + "step": 3145 + }, + { + "epoch": 0.2126537785588752, + "grad_norm": 0.5734158158302307, + "learning_rate": 2.7553696518210165e-05, + "loss": 0.116729736328125, + "step": 3146 + }, + { + "epoch": 0.21272137352980938, + "grad_norm": 1.7715977430343628, + "learning_rate": 2.7551890263580853e-05, + "loss": 0.26416015625, + "step": 3147 + }, + { + "epoch": 0.21278896850074355, + "grad_norm": 0.6266242265701294, + "learning_rate": 2.7550083401613028e-05, + "loss": 0.10570144653320312, + "step": 3148 + }, + { + "epoch": 0.2128565634716777, + "grad_norm": 1.1833409070968628, + "learning_rate": 2.7548275932394122e-05, + "loss": 0.20808029174804688, + "step": 3149 + }, + { + "epoch": 0.21292415844261187, + "grad_norm": 0.6154717206954956, + "learning_rate": 2.754646785601159e-05, + "loss": 0.17437744140625, + "step": 3150 + }, + { + "epoch": 0.21299175341354604, + "grad_norm": 0.4435068368911743, + "learning_rate": 2.7544659172552912e-05, + "loss": 0.06331634521484375, + "step": 3151 + }, + { + "epoch": 0.2130593483844802, + "grad_norm": 0.7025219798088074, + "learning_rate": 2.7542849882105618e-05, + "loss": 0.14013671875, + "step": 3152 + }, + { + "epoch": 0.21312694335541435, + "grad_norm": 0.7595950365066528, + "learning_rate": 2.7541039984757238e-05, + "loss": 0.09394073486328125, + "step": 3153 + }, + { + "epoch": 0.21319453832634852, + "grad_norm": 0.4861356317996979, + "learning_rate": 2.7539229480595356e-05, + "loss": 0.0548248291015625, + "step": 3154 + }, + { + "epoch": 0.2132621332972827, + "grad_norm": 0.7316564321517944, + "learning_rate": 2.7537418369707568e-05, + "loss": 0.0686187744140625, + "step": 3155 + }, + { + "epoch": 0.21332972826821683, + "grad_norm": 0.36702728271484375, + "learning_rate": 2.753560665218151e-05, + "loss": 0.0590972900390625, + "step": 3156 + }, + { + "epoch": 0.213397323239151, + "grad_norm": 0.35606086254119873, + "learning_rate": 2.7533794328104844e-05, + "loss": 0.06417083740234375, + "step": 3157 + }, + { + "epoch": 0.21346491821008517, + "grad_norm": 0.6078017950057983, + "learning_rate": 2.753198139756526e-05, + "loss": 0.0979766845703125, + "step": 3158 + }, + { + "epoch": 0.21353251318101935, + "grad_norm": 0.2555921971797943, + "learning_rate": 2.753016786065048e-05, + "loss": 0.03478050231933594, + "step": 3159 + }, + { + "epoch": 0.2136001081519535, + "grad_norm": 0.8321879506111145, + "learning_rate": 2.7528353717448257e-05, + "loss": 0.16510009765625, + "step": 3160 + }, + { + "epoch": 0.21366770312288766, + "grad_norm": 0.9191932678222656, + "learning_rate": 2.752653896804637e-05, + "loss": 0.19317626953125, + "step": 3161 + }, + { + "epoch": 0.21373529809382183, + "grad_norm": 0.8466549515724182, + "learning_rate": 2.7524723612532625e-05, + "loss": 0.25250244140625, + "step": 3162 + }, + { + "epoch": 0.21380289306475597, + "grad_norm": 0.5133512616157532, + "learning_rate": 2.752290765099486e-05, + "loss": 0.094482421875, + "step": 3163 + }, + { + "epoch": 0.21387048803569014, + "grad_norm": 1.1379319429397583, + "learning_rate": 2.7521091083520944e-05, + "loss": 0.154541015625, + "step": 3164 + }, + { + "epoch": 0.2139380830066243, + "grad_norm": 0.37946024537086487, + "learning_rate": 2.7519273910198775e-05, + "loss": 0.065826416015625, + "step": 3165 + }, + { + "epoch": 0.21400567797755846, + "grad_norm": 0.9537404775619507, + "learning_rate": 2.7517456131116274e-05, + "loss": 0.153717041015625, + "step": 3166 + }, + { + "epoch": 0.21407327294849263, + "grad_norm": 1.1986310482025146, + "learning_rate": 2.75156377463614e-05, + "loss": 0.235504150390625, + "step": 3167 + }, + { + "epoch": 0.2141408679194268, + "grad_norm": 1.072627067565918, + "learning_rate": 2.7513818756022144e-05, + "loss": 0.201507568359375, + "step": 3168 + }, + { + "epoch": 0.21420846289036097, + "grad_norm": 1.3419803380966187, + "learning_rate": 2.7511999160186508e-05, + "loss": 0.1547088623046875, + "step": 3169 + }, + { + "epoch": 0.2142760578612951, + "grad_norm": 0.4552232027053833, + "learning_rate": 2.7510178958942543e-05, + "loss": 0.06064605712890625, + "step": 3170 + }, + { + "epoch": 0.21434365283222928, + "grad_norm": 0.2584376633167267, + "learning_rate": 2.7508358152378324e-05, + "loss": 0.04681396484375, + "step": 3171 + }, + { + "epoch": 0.21441124780316345, + "grad_norm": 0.8726046085357666, + "learning_rate": 2.7506536740581943e-05, + "loss": 0.14867782592773438, + "step": 3172 + }, + { + "epoch": 0.2144788427740976, + "grad_norm": 1.3674088716506958, + "learning_rate": 2.750471472364154e-05, + "loss": 0.1619873046875, + "step": 3173 + }, + { + "epoch": 0.21454643774503177, + "grad_norm": 0.7352094650268555, + "learning_rate": 2.750289210164527e-05, + "loss": 0.139556884765625, + "step": 3174 + }, + { + "epoch": 0.21461403271596594, + "grad_norm": 0.41073426604270935, + "learning_rate": 2.7501068874681328e-05, + "loss": 0.08380889892578125, + "step": 3175 + }, + { + "epoch": 0.2146816276869001, + "grad_norm": 1.6401020288467407, + "learning_rate": 2.749924504283793e-05, + "loss": 0.1549224853515625, + "step": 3176 + }, + { + "epoch": 0.21474922265783425, + "grad_norm": 0.5570884943008423, + "learning_rate": 2.749742060620333e-05, + "loss": 0.1027984619140625, + "step": 3177 + }, + { + "epoch": 0.21481681762876842, + "grad_norm": 2.0862061977386475, + "learning_rate": 2.7495595564865796e-05, + "loss": 0.265594482421875, + "step": 3178 + }, + { + "epoch": 0.2148844125997026, + "grad_norm": 0.9218659996986389, + "learning_rate": 2.7493769918913633e-05, + "loss": 0.1150054931640625, + "step": 3179 + }, + { + "epoch": 0.21495200757063673, + "grad_norm": 0.7316518425941467, + "learning_rate": 2.749194366843519e-05, + "loss": 0.1596221923828125, + "step": 3180 + }, + { + "epoch": 0.2150196025415709, + "grad_norm": 0.3293622136116028, + "learning_rate": 2.749011681351883e-05, + "loss": 0.07053375244140625, + "step": 3181 + }, + { + "epoch": 0.21508719751250507, + "grad_norm": 0.2779940366744995, + "learning_rate": 2.7488289354252935e-05, + "loss": 0.0507965087890625, + "step": 3182 + }, + { + "epoch": 0.21515479248343924, + "grad_norm": 1.21385657787323, + "learning_rate": 2.748646129072594e-05, + "loss": 0.16252899169921875, + "step": 3183 + }, + { + "epoch": 0.2152223874543734, + "grad_norm": 0.18009355664253235, + "learning_rate": 2.7484632623026294e-05, + "loss": 0.02660369873046875, + "step": 3184 + }, + { + "epoch": 0.21528998242530756, + "grad_norm": 0.6117817163467407, + "learning_rate": 2.7482803351242488e-05, + "loss": 0.09088134765625, + "step": 3185 + }, + { + "epoch": 0.21535757739624173, + "grad_norm": 0.5407953858375549, + "learning_rate": 2.7480973475463018e-05, + "loss": 0.11444091796875, + "step": 3186 + }, + { + "epoch": 0.21542517236717587, + "grad_norm": 0.5972457528114319, + "learning_rate": 2.7479142995776436e-05, + "loss": 0.1246490478515625, + "step": 3187 + }, + { + "epoch": 0.21549276733811004, + "grad_norm": 0.793376088142395, + "learning_rate": 2.7477311912271312e-05, + "loss": 0.1283416748046875, + "step": 3188 + }, + { + "epoch": 0.2155603623090442, + "grad_norm": 0.5709262490272522, + "learning_rate": 2.7475480225036237e-05, + "loss": 0.1660614013671875, + "step": 3189 + }, + { + "epoch": 0.21562795727997838, + "grad_norm": 0.43444928526878357, + "learning_rate": 2.7473647934159843e-05, + "loss": 0.07768821716308594, + "step": 3190 + }, + { + "epoch": 0.21569555225091253, + "grad_norm": 0.6144503355026245, + "learning_rate": 2.7471815039730793e-05, + "loss": 0.14667510986328125, + "step": 3191 + }, + { + "epoch": 0.2157631472218467, + "grad_norm": 0.817584753036499, + "learning_rate": 2.746998154183777e-05, + "loss": 0.10164642333984375, + "step": 3192 + }, + { + "epoch": 0.21583074219278087, + "grad_norm": 0.3909587264060974, + "learning_rate": 2.7468147440569487e-05, + "loss": 0.0766448974609375, + "step": 3193 + }, + { + "epoch": 0.215898337163715, + "grad_norm": 0.5931364893913269, + "learning_rate": 2.7466312736014698e-05, + "loss": 0.1019744873046875, + "step": 3194 + }, + { + "epoch": 0.21596593213464918, + "grad_norm": 0.45730310678482056, + "learning_rate": 2.7464477428262164e-05, + "loss": 0.071136474609375, + "step": 3195 + }, + { + "epoch": 0.21603352710558335, + "grad_norm": 0.3642682731151581, + "learning_rate": 2.7462641517400703e-05, + "loss": 0.0811920166015625, + "step": 3196 + }, + { + "epoch": 0.2161011220765175, + "grad_norm": 0.4456660747528076, + "learning_rate": 2.7460805003519137e-05, + "loss": 0.086212158203125, + "step": 3197 + }, + { + "epoch": 0.21616871704745166, + "grad_norm": 0.27898433804512024, + "learning_rate": 2.7458967886706336e-05, + "loss": 0.039081573486328125, + "step": 3198 + }, + { + "epoch": 0.21623631201838583, + "grad_norm": 0.7542362213134766, + "learning_rate": 2.7457130167051184e-05, + "loss": 0.0908966064453125, + "step": 3199 + }, + { + "epoch": 0.21630390698932, + "grad_norm": 1.0101003646850586, + "learning_rate": 2.7455291844642607e-05, + "loss": 0.14471054077148438, + "step": 3200 + }, + { + "epoch": 0.21637150196025415, + "grad_norm": 0.196891650557518, + "learning_rate": 2.7453452919569548e-05, + "loss": 0.036678314208984375, + "step": 3201 + }, + { + "epoch": 0.21643909693118832, + "grad_norm": 0.6475880146026611, + "learning_rate": 2.745161339192099e-05, + "loss": 0.101043701171875, + "step": 3202 + }, + { + "epoch": 0.2165066919021225, + "grad_norm": 0.9270824193954468, + "learning_rate": 2.7449773261785943e-05, + "loss": 0.132080078125, + "step": 3203 + }, + { + "epoch": 0.21657428687305663, + "grad_norm": 0.8780146241188049, + "learning_rate": 2.744793252925344e-05, + "loss": 0.16773223876953125, + "step": 3204 + }, + { + "epoch": 0.2166418818439908, + "grad_norm": 0.7115905284881592, + "learning_rate": 2.744609119441255e-05, + "loss": 0.171966552734375, + "step": 3205 + }, + { + "epoch": 0.21670947681492497, + "grad_norm": 0.8410437703132629, + "learning_rate": 2.7444249257352368e-05, + "loss": 0.13289642333984375, + "step": 3206 + }, + { + "epoch": 0.21677707178585914, + "grad_norm": 0.8268284797668457, + "learning_rate": 2.744240671816201e-05, + "loss": 0.06552886962890625, + "step": 3207 + }, + { + "epoch": 0.2168446667567933, + "grad_norm": 0.581201434135437, + "learning_rate": 2.744056357693064e-05, + "loss": 0.10400390625, + "step": 3208 + }, + { + "epoch": 0.21691226172772746, + "grad_norm": 0.8303901553153992, + "learning_rate": 2.743871983374744e-05, + "loss": 0.12233734130859375, + "step": 3209 + }, + { + "epoch": 0.21697985669866163, + "grad_norm": 0.5163869261741638, + "learning_rate": 2.743687548870162e-05, + "loss": 0.0638275146484375, + "step": 3210 + }, + { + "epoch": 0.21704745166959577, + "grad_norm": 0.2709130048751831, + "learning_rate": 2.7435030541882412e-05, + "loss": 0.05831146240234375, + "step": 3211 + }, + { + "epoch": 0.21711504664052994, + "grad_norm": 1.2182918787002563, + "learning_rate": 2.74331849933791e-05, + "loss": 0.19134521484375, + "step": 3212 + }, + { + "epoch": 0.2171826416114641, + "grad_norm": 0.689172089099884, + "learning_rate": 2.7431338843280974e-05, + "loss": 0.0945281982421875, + "step": 3213 + }, + { + "epoch": 0.21725023658239828, + "grad_norm": 1.2617626190185547, + "learning_rate": 2.7429492091677365e-05, + "loss": 0.23272705078125, + "step": 3214 + }, + { + "epoch": 0.21731783155333242, + "grad_norm": 0.18015480041503906, + "learning_rate": 2.7427644738657635e-05, + "loss": 0.024095535278320312, + "step": 3215 + }, + { + "epoch": 0.2173854265242666, + "grad_norm": 0.9770106077194214, + "learning_rate": 2.742579678431116e-05, + "loss": 0.1317138671875, + "step": 3216 + }, + { + "epoch": 0.21745302149520077, + "grad_norm": 0.7151167988777161, + "learning_rate": 2.742394822872736e-05, + "loss": 0.1272125244140625, + "step": 3217 + }, + { + "epoch": 0.2175206164661349, + "grad_norm": 0.9526801705360413, + "learning_rate": 2.742209907199569e-05, + "loss": 0.1753997802734375, + "step": 3218 + }, + { + "epoch": 0.21758821143706908, + "grad_norm": 1.2193502187728882, + "learning_rate": 2.7420249314205608e-05, + "loss": 0.17139816284179688, + "step": 3219 + }, + { + "epoch": 0.21765580640800325, + "grad_norm": 0.5537486672401428, + "learning_rate": 2.7418398955446625e-05, + "loss": 0.09100341796875, + "step": 3220 + }, + { + "epoch": 0.21772340137893742, + "grad_norm": 0.6503932476043701, + "learning_rate": 2.741654799580827e-05, + "loss": 0.11693572998046875, + "step": 3221 + }, + { + "epoch": 0.21779099634987156, + "grad_norm": 0.4167648255825043, + "learning_rate": 2.7414696435380108e-05, + "loss": 0.0624237060546875, + "step": 3222 + }, + { + "epoch": 0.21785859132080573, + "grad_norm": 0.7789947986602783, + "learning_rate": 2.7412844274251724e-05, + "loss": 0.13387298583984375, + "step": 3223 + }, + { + "epoch": 0.2179261862917399, + "grad_norm": 0.9716550707817078, + "learning_rate": 2.741099151251274e-05, + "loss": 0.1912841796875, + "step": 3224 + }, + { + "epoch": 0.21799378126267405, + "grad_norm": 0.48010775446891785, + "learning_rate": 2.7409138150252805e-05, + "loss": 0.099609375, + "step": 3225 + }, + { + "epoch": 0.21806137623360822, + "grad_norm": 0.3528156280517578, + "learning_rate": 2.7407284187561593e-05, + "loss": 0.05382537841796875, + "step": 3226 + }, + { + "epoch": 0.2181289712045424, + "grad_norm": 0.25427570939064026, + "learning_rate": 2.7405429624528813e-05, + "loss": 0.04821014404296875, + "step": 3227 + }, + { + "epoch": 0.21819656617547656, + "grad_norm": 0.647977888584137, + "learning_rate": 2.74035744612442e-05, + "loss": 0.1184539794921875, + "step": 3228 + }, + { + "epoch": 0.2182641611464107, + "grad_norm": 0.48606184124946594, + "learning_rate": 2.7401718697797514e-05, + "loss": 0.116912841796875, + "step": 3229 + }, + { + "epoch": 0.21833175611734487, + "grad_norm": 0.8648349642753601, + "learning_rate": 2.7399862334278555e-05, + "loss": 0.22216796875, + "step": 3230 + }, + { + "epoch": 0.21839935108827904, + "grad_norm": 0.6337718963623047, + "learning_rate": 2.739800537077714e-05, + "loss": 0.07161712646484375, + "step": 3231 + }, + { + "epoch": 0.21846694605921319, + "grad_norm": 2.7913835048675537, + "learning_rate": 2.7396147807383127e-05, + "loss": 0.240570068359375, + "step": 3232 + }, + { + "epoch": 0.21853454103014736, + "grad_norm": 1.8561676740646362, + "learning_rate": 2.739428964418639e-05, + "loss": 0.225830078125, + "step": 3233 + }, + { + "epoch": 0.21860213600108153, + "grad_norm": 1.167678713798523, + "learning_rate": 2.739243088127684e-05, + "loss": 0.2332916259765625, + "step": 3234 + }, + { + "epoch": 0.21866973097201567, + "grad_norm": 0.19761356711387634, + "learning_rate": 2.7390571518744416e-05, + "loss": 0.0452880859375, + "step": 3235 + }, + { + "epoch": 0.21873732594294984, + "grad_norm": 0.5059375762939453, + "learning_rate": 2.7388711556679087e-05, + "loss": 0.145050048828125, + "step": 3236 + }, + { + "epoch": 0.218804920913884, + "grad_norm": 0.8670137524604797, + "learning_rate": 2.7386850995170848e-05, + "loss": 0.1681671142578125, + "step": 3237 + }, + { + "epoch": 0.21887251588481818, + "grad_norm": 0.5788934826850891, + "learning_rate": 2.7384989834309722e-05, + "loss": 0.1042327880859375, + "step": 3238 + }, + { + "epoch": 0.21894011085575232, + "grad_norm": 1.0474501848220825, + "learning_rate": 2.7383128074185767e-05, + "loss": 0.181060791015625, + "step": 3239 + }, + { + "epoch": 0.2190077058266865, + "grad_norm": 1.0818971395492554, + "learning_rate": 2.738126571488907e-05, + "loss": 0.1661376953125, + "step": 3240 + }, + { + "epoch": 0.21907530079762066, + "grad_norm": 0.8998450636863708, + "learning_rate": 2.737940275650974e-05, + "loss": 0.12725830078125, + "step": 3241 + }, + { + "epoch": 0.2191428957685548, + "grad_norm": 0.37565580010414124, + "learning_rate": 2.7377539199137914e-05, + "loss": 0.07727813720703125, + "step": 3242 + }, + { + "epoch": 0.21921049073948898, + "grad_norm": 0.37032583355903625, + "learning_rate": 2.7375675042863765e-05, + "loss": 0.07231330871582031, + "step": 3243 + }, + { + "epoch": 0.21927808571042315, + "grad_norm": 0.30072712898254395, + "learning_rate": 2.73738102877775e-05, + "loss": 0.05783843994140625, + "step": 3244 + }, + { + "epoch": 0.21934568068135732, + "grad_norm": 0.6533893942832947, + "learning_rate": 2.7371944933969334e-05, + "loss": 0.10088348388671875, + "step": 3245 + }, + { + "epoch": 0.21941327565229146, + "grad_norm": 0.22025452554225922, + "learning_rate": 2.737007898152954e-05, + "loss": 0.056041717529296875, + "step": 3246 + }, + { + "epoch": 0.21948087062322563, + "grad_norm": 3.0789339542388916, + "learning_rate": 2.7368212430548388e-05, + "loss": 0.27911376953125, + "step": 3247 + }, + { + "epoch": 0.2195484655941598, + "grad_norm": 0.69973224401474, + "learning_rate": 2.7366345281116207e-05, + "loss": 0.137725830078125, + "step": 3248 + }, + { + "epoch": 0.21961606056509395, + "grad_norm": 0.28480562567710876, + "learning_rate": 2.7364477533323337e-05, + "loss": 0.04144859313964844, + "step": 3249 + }, + { + "epoch": 0.21968365553602812, + "grad_norm": 1.1403411626815796, + "learning_rate": 2.7362609187260146e-05, + "loss": 0.213470458984375, + "step": 3250 + }, + { + "epoch": 0.2197512505069623, + "grad_norm": 1.173104166984558, + "learning_rate": 2.7360740243017042e-05, + "loss": 0.21319580078125, + "step": 3251 + }, + { + "epoch": 0.21981884547789646, + "grad_norm": 0.3439694941043854, + "learning_rate": 2.735887070068445e-05, + "loss": 0.03972625732421875, + "step": 3252 + }, + { + "epoch": 0.2198864404488306, + "grad_norm": 0.9071300029754639, + "learning_rate": 2.7357000560352843e-05, + "loss": 0.172637939453125, + "step": 3253 + }, + { + "epoch": 0.21995403541976477, + "grad_norm": 0.493401437997818, + "learning_rate": 2.7355129822112694e-05, + "loss": 0.07457733154296875, + "step": 3254 + }, + { + "epoch": 0.22002163039069894, + "grad_norm": 0.9977712035179138, + "learning_rate": 2.7353258486054532e-05, + "loss": 0.12067413330078125, + "step": 3255 + }, + { + "epoch": 0.22008922536163308, + "grad_norm": 0.2856846749782562, + "learning_rate": 2.73513865522689e-05, + "loss": 0.06885528564453125, + "step": 3256 + }, + { + "epoch": 0.22015682033256725, + "grad_norm": 0.7236219048500061, + "learning_rate": 2.7349514020846373e-05, + "loss": 0.1395263671875, + "step": 3257 + }, + { + "epoch": 0.22022441530350143, + "grad_norm": 0.22434081137180328, + "learning_rate": 2.734764089187756e-05, + "loss": 0.041049957275390625, + "step": 3258 + }, + { + "epoch": 0.2202920102744356, + "grad_norm": 0.2676725685596466, + "learning_rate": 2.734576716545309e-05, + "loss": 0.049724578857421875, + "step": 3259 + }, + { + "epoch": 0.22035960524536974, + "grad_norm": 0.3059399425983429, + "learning_rate": 2.734389284166362e-05, + "loss": 0.05621337890625, + "step": 3260 + }, + { + "epoch": 0.2204272002163039, + "grad_norm": 0.5717666149139404, + "learning_rate": 2.734201792059986e-05, + "loss": 0.08344268798828125, + "step": 3261 + }, + { + "epoch": 0.22049479518723808, + "grad_norm": 0.29644083976745605, + "learning_rate": 2.7340142402352513e-05, + "loss": 0.033199310302734375, + "step": 3262 + }, + { + "epoch": 0.22056239015817222, + "grad_norm": 0.6156085133552551, + "learning_rate": 2.7338266287012336e-05, + "loss": 0.1359100341796875, + "step": 3263 + }, + { + "epoch": 0.2206299851291064, + "grad_norm": 0.34184136986732483, + "learning_rate": 2.7336389574670104e-05, + "loss": 0.06499481201171875, + "step": 3264 + }, + { + "epoch": 0.22069758010004056, + "grad_norm": 0.12089470028877258, + "learning_rate": 2.7334512265416626e-05, + "loss": 0.021738052368164062, + "step": 3265 + }, + { + "epoch": 0.2207651750709747, + "grad_norm": 1.005769968032837, + "learning_rate": 2.7332634359342736e-05, + "loss": 0.244110107421875, + "step": 3266 + }, + { + "epoch": 0.22083277004190888, + "grad_norm": 0.6251969337463379, + "learning_rate": 2.73307558565393e-05, + "loss": 0.147003173828125, + "step": 3267 + }, + { + "epoch": 0.22090036501284305, + "grad_norm": 0.560516357421875, + "learning_rate": 2.732887675709721e-05, + "loss": 0.0972900390625, + "step": 3268 + }, + { + "epoch": 0.22096795998377722, + "grad_norm": 0.6541900038719177, + "learning_rate": 2.7326997061107392e-05, + "loss": 0.1221923828125, + "step": 3269 + }, + { + "epoch": 0.22103555495471136, + "grad_norm": 0.3823329210281372, + "learning_rate": 2.7325116768660798e-05, + "loss": 0.0834503173828125, + "step": 3270 + }, + { + "epoch": 0.22110314992564553, + "grad_norm": 0.7308245301246643, + "learning_rate": 2.73232358798484e-05, + "loss": 0.13242340087890625, + "step": 3271 + }, + { + "epoch": 0.2211707448965797, + "grad_norm": 0.6914088129997253, + "learning_rate": 2.7321354394761216e-05, + "loss": 0.1324615478515625, + "step": 3272 + }, + { + "epoch": 0.22123833986751384, + "grad_norm": 0.3479551672935486, + "learning_rate": 2.731947231349028e-05, + "loss": 0.0797882080078125, + "step": 3273 + }, + { + "epoch": 0.22130593483844802, + "grad_norm": 0.5961132049560547, + "learning_rate": 2.731758963612666e-05, + "loss": 0.1605224609375, + "step": 3274 + }, + { + "epoch": 0.22137352980938219, + "grad_norm": 0.3105708360671997, + "learning_rate": 2.7315706362761453e-05, + "loss": 0.034473419189453125, + "step": 3275 + }, + { + "epoch": 0.22144112478031636, + "grad_norm": 2.868466377258301, + "learning_rate": 2.7313822493485775e-05, + "loss": 0.3074951171875, + "step": 3276 + }, + { + "epoch": 0.2215087197512505, + "grad_norm": 0.7974432110786438, + "learning_rate": 2.7311938028390792e-05, + "loss": 0.1813201904296875, + "step": 3277 + }, + { + "epoch": 0.22157631472218467, + "grad_norm": 0.9921379685401917, + "learning_rate": 2.731005296756768e-05, + "loss": 0.1340179443359375, + "step": 3278 + }, + { + "epoch": 0.22164390969311884, + "grad_norm": 0.7347827553749084, + "learning_rate": 2.7308167311107646e-05, + "loss": 0.108245849609375, + "step": 3279 + }, + { + "epoch": 0.22171150466405298, + "grad_norm": 0.4398522973060608, + "learning_rate": 2.730628105910194e-05, + "loss": 0.080718994140625, + "step": 3280 + }, + { + "epoch": 0.22177909963498715, + "grad_norm": 0.9742440581321716, + "learning_rate": 2.730439421164182e-05, + "loss": 0.12121772766113281, + "step": 3281 + }, + { + "epoch": 0.22184669460592132, + "grad_norm": 0.2725662589073181, + "learning_rate": 2.7302506768818587e-05, + "loss": 0.058506011962890625, + "step": 3282 + }, + { + "epoch": 0.2219142895768555, + "grad_norm": 1.215789794921875, + "learning_rate": 2.7300618730723567e-05, + "loss": 0.18914794921875, + "step": 3283 + }, + { + "epoch": 0.22198188454778964, + "grad_norm": 0.6601470112800598, + "learning_rate": 2.7298730097448124e-05, + "loss": 0.1401824951171875, + "step": 3284 + }, + { + "epoch": 0.2220494795187238, + "grad_norm": 0.3149113059043884, + "learning_rate": 2.729684086908363e-05, + "loss": 0.044162750244140625, + "step": 3285 + }, + { + "epoch": 0.22211707448965798, + "grad_norm": 0.9486392140388489, + "learning_rate": 2.7294951045721495e-05, + "loss": 0.114959716796875, + "step": 3286 + }, + { + "epoch": 0.22218466946059212, + "grad_norm": 0.7399736642837524, + "learning_rate": 2.7293060627453173e-05, + "loss": 0.1381683349609375, + "step": 3287 + }, + { + "epoch": 0.2222522644315263, + "grad_norm": 0.7070549130439758, + "learning_rate": 2.729116961437013e-05, + "loss": 0.17469024658203125, + "step": 3288 + }, + { + "epoch": 0.22231985940246046, + "grad_norm": 0.3397762179374695, + "learning_rate": 2.7289278006563854e-05, + "loss": 0.0748291015625, + "step": 3289 + }, + { + "epoch": 0.22238745437339463, + "grad_norm": 0.7532287836074829, + "learning_rate": 2.728738580412589e-05, + "loss": 0.15313720703125, + "step": 3290 + }, + { + "epoch": 0.22245504934432878, + "grad_norm": 1.1377888917922974, + "learning_rate": 2.7285493007147785e-05, + "loss": 0.1801910400390625, + "step": 3291 + }, + { + "epoch": 0.22252264431526295, + "grad_norm": 0.6249300241470337, + "learning_rate": 2.728359961572113e-05, + "loss": 0.11667633056640625, + "step": 3292 + }, + { + "epoch": 0.22259023928619712, + "grad_norm": 0.8114367723464966, + "learning_rate": 2.728170562993753e-05, + "loss": 0.1075592041015625, + "step": 3293 + }, + { + "epoch": 0.22265783425713126, + "grad_norm": 0.2619948387145996, + "learning_rate": 2.727981104988863e-05, + "loss": 0.0454864501953125, + "step": 3294 + }, + { + "epoch": 0.22272542922806543, + "grad_norm": 1.2379686832427979, + "learning_rate": 2.7277915875666106e-05, + "loss": 0.13035964965820312, + "step": 3295 + }, + { + "epoch": 0.2227930241989996, + "grad_norm": 0.598238468170166, + "learning_rate": 2.7276020107361656e-05, + "loss": 0.12403106689453125, + "step": 3296 + }, + { + "epoch": 0.22286061916993374, + "grad_norm": 0.8022485375404358, + "learning_rate": 2.727412374506701e-05, + "loss": 0.17535400390625, + "step": 3297 + }, + { + "epoch": 0.22292821414086791, + "grad_norm": 1.4610021114349365, + "learning_rate": 2.7272226788873924e-05, + "loss": 0.2410888671875, + "step": 3298 + }, + { + "epoch": 0.22299580911180208, + "grad_norm": 0.5125605463981628, + "learning_rate": 2.727032923887419e-05, + "loss": 0.13529205322265625, + "step": 3299 + }, + { + "epoch": 0.22306340408273626, + "grad_norm": 0.9503492116928101, + "learning_rate": 2.7268431095159614e-05, + "loss": 0.194305419921875, + "step": 3300 + }, + { + "epoch": 0.2231309990536704, + "grad_norm": 0.4019469618797302, + "learning_rate": 2.7266532357822046e-05, + "loss": 0.06281661987304688, + "step": 3301 + }, + { + "epoch": 0.22319859402460457, + "grad_norm": 0.4144556522369385, + "learning_rate": 2.726463302695336e-05, + "loss": 0.05385589599609375, + "step": 3302 + }, + { + "epoch": 0.22326618899553874, + "grad_norm": 0.7977414131164551, + "learning_rate": 2.7262733102645458e-05, + "loss": 0.1539459228515625, + "step": 3303 + }, + { + "epoch": 0.22333378396647288, + "grad_norm": 0.31558114290237427, + "learning_rate": 2.726083258499026e-05, + "loss": 0.0554656982421875, + "step": 3304 + }, + { + "epoch": 0.22340137893740705, + "grad_norm": 1.2124205827713013, + "learning_rate": 2.725893147407974e-05, + "loss": 0.13404083251953125, + "step": 3305 + }, + { + "epoch": 0.22346897390834122, + "grad_norm": 0.7039981484413147, + "learning_rate": 2.725702977000587e-05, + "loss": 0.15777587890625, + "step": 3306 + }, + { + "epoch": 0.2235365688792754, + "grad_norm": 0.432883620262146, + "learning_rate": 2.725512747286068e-05, + "loss": 0.05046844482421875, + "step": 3307 + }, + { + "epoch": 0.22360416385020954, + "grad_norm": 0.58405601978302, + "learning_rate": 2.7253224582736205e-05, + "loss": 0.05397796630859375, + "step": 3308 + }, + { + "epoch": 0.2236717588211437, + "grad_norm": 0.5670505166053772, + "learning_rate": 2.7251321099724528e-05, + "loss": 0.122100830078125, + "step": 3309 + }, + { + "epoch": 0.22373935379207788, + "grad_norm": 0.3037446141242981, + "learning_rate": 2.724941702391774e-05, + "loss": 0.06655120849609375, + "step": 3310 + }, + { + "epoch": 0.22380694876301202, + "grad_norm": 1.9423598051071167, + "learning_rate": 2.7247512355407983e-05, + "loss": 0.2623291015625, + "step": 3311 + }, + { + "epoch": 0.2238745437339462, + "grad_norm": 0.891626238822937, + "learning_rate": 2.724560709428741e-05, + "loss": 0.13555145263671875, + "step": 3312 + }, + { + "epoch": 0.22394213870488036, + "grad_norm": 0.8264286518096924, + "learning_rate": 2.724370124064821e-05, + "loss": 0.1371307373046875, + "step": 3313 + }, + { + "epoch": 0.22400973367581453, + "grad_norm": 0.6424928903579712, + "learning_rate": 2.7241794794582598e-05, + "loss": 0.1110382080078125, + "step": 3314 + }, + { + "epoch": 0.22407732864674867, + "grad_norm": 0.8087763786315918, + "learning_rate": 2.723988775618283e-05, + "loss": 0.08563232421875, + "step": 3315 + }, + { + "epoch": 0.22414492361768285, + "grad_norm": 1.0984066724777222, + "learning_rate": 2.7237980125541173e-05, + "loss": 0.1565093994140625, + "step": 3316 + }, + { + "epoch": 0.22421251858861702, + "grad_norm": 0.49340370297431946, + "learning_rate": 2.7236071902749926e-05, + "loss": 0.04969024658203125, + "step": 3317 + }, + { + "epoch": 0.22428011355955116, + "grad_norm": 1.155189871788025, + "learning_rate": 2.7234163087901427e-05, + "loss": 0.170989990234375, + "step": 3318 + }, + { + "epoch": 0.22434770853048533, + "grad_norm": 0.2992880344390869, + "learning_rate": 2.7232253681088034e-05, + "loss": 0.040203094482421875, + "step": 3319 + }, + { + "epoch": 0.2244153035014195, + "grad_norm": 0.9736011624336243, + "learning_rate": 2.7230343682402136e-05, + "loss": 0.1519012451171875, + "step": 3320 + }, + { + "epoch": 0.22448289847235367, + "grad_norm": 1.014105200767517, + "learning_rate": 2.7228433091936152e-05, + "loss": 0.14190673828125, + "step": 3321 + }, + { + "epoch": 0.2245504934432878, + "grad_norm": 0.6375427842140198, + "learning_rate": 2.7226521909782525e-05, + "loss": 0.0768890380859375, + "step": 3322 + }, + { + "epoch": 0.22461808841422198, + "grad_norm": 0.4073585569858551, + "learning_rate": 2.7224610136033733e-05, + "loss": 0.0611724853515625, + "step": 3323 + }, + { + "epoch": 0.22468568338515615, + "grad_norm": 1.2117401361465454, + "learning_rate": 2.722269777078228e-05, + "loss": 0.10733795166015625, + "step": 3324 + }, + { + "epoch": 0.2247532783560903, + "grad_norm": 0.36076533794403076, + "learning_rate": 2.7220784814120694e-05, + "loss": 0.07109832763671875, + "step": 3325 + }, + { + "epoch": 0.22482087332702447, + "grad_norm": 0.856400728225708, + "learning_rate": 2.7218871266141542e-05, + "loss": 0.08419418334960938, + "step": 3326 + }, + { + "epoch": 0.22488846829795864, + "grad_norm": 0.5192115306854248, + "learning_rate": 2.7216957126937407e-05, + "loss": 0.081756591796875, + "step": 3327 + }, + { + "epoch": 0.22495606326889278, + "grad_norm": 0.7765573859214783, + "learning_rate": 2.7215042396600912e-05, + "loss": 0.10919189453125, + "step": 3328 + }, + { + "epoch": 0.22502365823982695, + "grad_norm": 0.6095716953277588, + "learning_rate": 2.7213127075224697e-05, + "loss": 0.156585693359375, + "step": 3329 + }, + { + "epoch": 0.22509125321076112, + "grad_norm": 1.2819457054138184, + "learning_rate": 2.7211211162901446e-05, + "loss": 0.207672119140625, + "step": 3330 + }, + { + "epoch": 0.2251588481816953, + "grad_norm": 0.701347291469574, + "learning_rate": 2.7209294659723856e-05, + "loss": 0.138824462890625, + "step": 3331 + }, + { + "epoch": 0.22522644315262944, + "grad_norm": 0.9083551168441772, + "learning_rate": 2.7207377565784666e-05, + "loss": 0.1602783203125, + "step": 3332 + }, + { + "epoch": 0.2252940381235636, + "grad_norm": 1.0796529054641724, + "learning_rate": 2.720545988117663e-05, + "loss": 0.14368438720703125, + "step": 3333 + }, + { + "epoch": 0.22536163309449778, + "grad_norm": 0.31000861525535583, + "learning_rate": 2.7203541605992538e-05, + "loss": 0.0481719970703125, + "step": 3334 + }, + { + "epoch": 0.22542922806543192, + "grad_norm": 1.1021867990493774, + "learning_rate": 2.7201622740325208e-05, + "loss": 0.152587890625, + "step": 3335 + }, + { + "epoch": 0.2254968230363661, + "grad_norm": 0.6008150577545166, + "learning_rate": 2.719970328426749e-05, + "loss": 0.13397216796875, + "step": 3336 + }, + { + "epoch": 0.22556441800730026, + "grad_norm": 0.7253993153572083, + "learning_rate": 2.7197783237912262e-05, + "loss": 0.1523590087890625, + "step": 3337 + }, + { + "epoch": 0.22563201297823443, + "grad_norm": 1.220421314239502, + "learning_rate": 2.7195862601352424e-05, + "loss": 0.15130615234375, + "step": 3338 + }, + { + "epoch": 0.22569960794916857, + "grad_norm": 1.4049999713897705, + "learning_rate": 2.7193941374680904e-05, + "loss": 0.1728515625, + "step": 3339 + }, + { + "epoch": 0.22576720292010274, + "grad_norm": 0.47095823287963867, + "learning_rate": 2.719201955799067e-05, + "loss": 0.0634613037109375, + "step": 3340 + }, + { + "epoch": 0.22583479789103691, + "grad_norm": 1.1728373765945435, + "learning_rate": 2.719009715137471e-05, + "loss": 0.1478271484375, + "step": 3341 + }, + { + "epoch": 0.22590239286197106, + "grad_norm": 0.5299354791641235, + "learning_rate": 2.718817415492603e-05, + "loss": 0.0545196533203125, + "step": 3342 + }, + { + "epoch": 0.22596998783290523, + "grad_norm": 1.1609718799591064, + "learning_rate": 2.7186250568737695e-05, + "loss": 0.177001953125, + "step": 3343 + }, + { + "epoch": 0.2260375828038394, + "grad_norm": 0.3362341523170471, + "learning_rate": 2.7184326392902773e-05, + "loss": 0.06931304931640625, + "step": 3344 + }, + { + "epoch": 0.22610517777477357, + "grad_norm": 0.449603796005249, + "learning_rate": 2.7182401627514366e-05, + "loss": 0.07611083984375, + "step": 3345 + }, + { + "epoch": 0.2261727727457077, + "grad_norm": 0.763382077217102, + "learning_rate": 2.7180476272665604e-05, + "loss": 0.10800361633300781, + "step": 3346 + }, + { + "epoch": 0.22624036771664188, + "grad_norm": 0.7863945364952087, + "learning_rate": 2.7178550328449657e-05, + "loss": 0.12432861328125, + "step": 3347 + }, + { + "epoch": 0.22630796268757605, + "grad_norm": 0.3754102885723114, + "learning_rate": 2.71766237949597e-05, + "loss": 0.0615234375, + "step": 3348 + }, + { + "epoch": 0.2263755576585102, + "grad_norm": 0.2518337666988373, + "learning_rate": 2.717469667228896e-05, + "loss": 0.04668426513671875, + "step": 3349 + }, + { + "epoch": 0.22644315262944437, + "grad_norm": 0.44346198439598083, + "learning_rate": 2.7172768960530684e-05, + "loss": 0.061553955078125, + "step": 3350 + }, + { + "epoch": 0.22651074760037854, + "grad_norm": 1.1428111791610718, + "learning_rate": 2.7170840659778144e-05, + "loss": 0.22222900390625, + "step": 3351 + }, + { + "epoch": 0.2265783425713127, + "grad_norm": 0.35024046897888184, + "learning_rate": 2.7168911770124636e-05, + "loss": 0.07954788208007812, + "step": 3352 + }, + { + "epoch": 0.22664593754224685, + "grad_norm": 0.8875933289527893, + "learning_rate": 2.7166982291663507e-05, + "loss": 0.178924560546875, + "step": 3353 + }, + { + "epoch": 0.22671353251318102, + "grad_norm": 0.3413335084915161, + "learning_rate": 2.7165052224488106e-05, + "loss": 0.04807281494140625, + "step": 3354 + }, + { + "epoch": 0.2267811274841152, + "grad_norm": 0.5012961626052856, + "learning_rate": 2.7163121568691827e-05, + "loss": 0.10973358154296875, + "step": 3355 + }, + { + "epoch": 0.22684872245504933, + "grad_norm": 0.29349884390830994, + "learning_rate": 2.716119032436808e-05, + "loss": 0.043605804443359375, + "step": 3356 + }, + { + "epoch": 0.2269163174259835, + "grad_norm": 0.8233620524406433, + "learning_rate": 2.7159258491610317e-05, + "loss": 0.1007232666015625, + "step": 3357 + }, + { + "epoch": 0.22698391239691768, + "grad_norm": 0.7365236878395081, + "learning_rate": 2.7157326070512013e-05, + "loss": 0.1563262939453125, + "step": 3358 + }, + { + "epoch": 0.22705150736785182, + "grad_norm": 0.25852638483047485, + "learning_rate": 2.715539306116667e-05, + "loss": 0.073486328125, + "step": 3359 + }, + { + "epoch": 0.227119102338786, + "grad_norm": 0.8701937794685364, + "learning_rate": 2.7153459463667812e-05, + "loss": 0.185638427734375, + "step": 3360 + }, + { + "epoch": 0.22718669730972016, + "grad_norm": 0.40538308024406433, + "learning_rate": 2.7151525278109005e-05, + "loss": 0.07781982421875, + "step": 3361 + }, + { + "epoch": 0.22725429228065433, + "grad_norm": 0.47947123646736145, + "learning_rate": 2.714959050458384e-05, + "loss": 0.0799407958984375, + "step": 3362 + }, + { + "epoch": 0.22732188725158847, + "grad_norm": 0.7381042838096619, + "learning_rate": 2.7147655143185923e-05, + "loss": 0.07550048828125, + "step": 3363 + }, + { + "epoch": 0.22738948222252264, + "grad_norm": 1.4695639610290527, + "learning_rate": 2.7145719194008907e-05, + "loss": 0.1932373046875, + "step": 3364 + }, + { + "epoch": 0.2274570771934568, + "grad_norm": 0.3391636908054352, + "learning_rate": 2.714378265714646e-05, + "loss": 0.043304443359375, + "step": 3365 + }, + { + "epoch": 0.22752467216439096, + "grad_norm": 0.27010515332221985, + "learning_rate": 2.714184553269229e-05, + "loss": 0.035091400146484375, + "step": 3366 + }, + { + "epoch": 0.22759226713532513, + "grad_norm": 0.7244620323181152, + "learning_rate": 2.7139907820740123e-05, + "loss": 0.142242431640625, + "step": 3367 + }, + { + "epoch": 0.2276598621062593, + "grad_norm": 0.38261693716049194, + "learning_rate": 2.7137969521383723e-05, + "loss": 0.0901336669921875, + "step": 3368 + }, + { + "epoch": 0.22772745707719347, + "grad_norm": 0.3097016513347626, + "learning_rate": 2.7136030634716866e-05, + "loss": 0.08092498779296875, + "step": 3369 + }, + { + "epoch": 0.2277950520481276, + "grad_norm": 0.6391309499740601, + "learning_rate": 2.713409116083338e-05, + "loss": 0.1529541015625, + "step": 3370 + }, + { + "epoch": 0.22786264701906178, + "grad_norm": 0.5884878039360046, + "learning_rate": 2.7132151099827095e-05, + "loss": 0.09116172790527344, + "step": 3371 + }, + { + "epoch": 0.22793024198999595, + "grad_norm": 0.6560859680175781, + "learning_rate": 2.7130210451791894e-05, + "loss": 0.1212921142578125, + "step": 3372 + }, + { + "epoch": 0.2279978369609301, + "grad_norm": 1.3842719793319702, + "learning_rate": 2.7128269216821672e-05, + "loss": 0.12201690673828125, + "step": 3373 + }, + { + "epoch": 0.22806543193186427, + "grad_norm": 0.48852190375328064, + "learning_rate": 2.7126327395010368e-05, + "loss": 0.09334564208984375, + "step": 3374 + }, + { + "epoch": 0.22813302690279844, + "grad_norm": 1.0057456493377686, + "learning_rate": 2.712438498645192e-05, + "loss": 0.11110115051269531, + "step": 3375 + }, + { + "epoch": 0.2282006218737326, + "grad_norm": 0.6443114280700684, + "learning_rate": 2.712244199124034e-05, + "loss": 0.06946563720703125, + "step": 3376 + }, + { + "epoch": 0.22826821684466675, + "grad_norm": 0.41558757424354553, + "learning_rate": 2.7120498409469615e-05, + "loss": 0.093292236328125, + "step": 3377 + }, + { + "epoch": 0.22833581181560092, + "grad_norm": 0.8196434378623962, + "learning_rate": 2.7118554241233807e-05, + "loss": 0.1264495849609375, + "step": 3378 + }, + { + "epoch": 0.2284034067865351, + "grad_norm": 0.9442164301872253, + "learning_rate": 2.711660948662698e-05, + "loss": 0.19989013671875, + "step": 3379 + }, + { + "epoch": 0.22847100175746923, + "grad_norm": 0.4032846987247467, + "learning_rate": 2.7114664145743235e-05, + "loss": 0.06683731079101562, + "step": 3380 + }, + { + "epoch": 0.2285385967284034, + "grad_norm": 1.123456597328186, + "learning_rate": 2.711271821867669e-05, + "loss": 0.1218719482421875, + "step": 3381 + }, + { + "epoch": 0.22860619169933757, + "grad_norm": 0.4196985065937042, + "learning_rate": 2.7110771705521516e-05, + "loss": 0.0877532958984375, + "step": 3382 + }, + { + "epoch": 0.22867378667027174, + "grad_norm": 1.2711647748947144, + "learning_rate": 2.7108824606371892e-05, + "loss": 0.215362548828125, + "step": 3383 + }, + { + "epoch": 0.2287413816412059, + "grad_norm": 0.6818634867668152, + "learning_rate": 2.7106876921322027e-05, + "loss": 0.1485595703125, + "step": 3384 + }, + { + "epoch": 0.22880897661214006, + "grad_norm": 0.9935893416404724, + "learning_rate": 2.7104928650466167e-05, + "loss": 0.15246200561523438, + "step": 3385 + }, + { + "epoch": 0.22887657158307423, + "grad_norm": 0.312103271484375, + "learning_rate": 2.7102979793898574e-05, + "loss": 0.0627288818359375, + "step": 3386 + }, + { + "epoch": 0.22894416655400837, + "grad_norm": 0.33810415863990784, + "learning_rate": 2.7101030351713557e-05, + "loss": 0.06322097778320312, + "step": 3387 + }, + { + "epoch": 0.22901176152494254, + "grad_norm": 0.4213429093360901, + "learning_rate": 2.7099080324005435e-05, + "loss": 0.07965850830078125, + "step": 3388 + }, + { + "epoch": 0.2290793564958767, + "grad_norm": 0.39253586530685425, + "learning_rate": 2.709712971086856e-05, + "loss": 0.083221435546875, + "step": 3389 + }, + { + "epoch": 0.22914695146681088, + "grad_norm": 0.9044973850250244, + "learning_rate": 2.709517851239732e-05, + "loss": 0.12083053588867188, + "step": 3390 + }, + { + "epoch": 0.22921454643774503, + "grad_norm": 0.22480151057243347, + "learning_rate": 2.709322672868613e-05, + "loss": 0.05715370178222656, + "step": 3391 + }, + { + "epoch": 0.2292821414086792, + "grad_norm": 0.30478790402412415, + "learning_rate": 2.7091274359829413e-05, + "loss": 0.07151031494140625, + "step": 3392 + }, + { + "epoch": 0.22934973637961337, + "grad_norm": 0.3244346082210541, + "learning_rate": 2.7089321405921654e-05, + "loss": 0.0683746337890625, + "step": 3393 + }, + { + "epoch": 0.2294173313505475, + "grad_norm": 0.4313850402832031, + "learning_rate": 2.7087367867057342e-05, + "loss": 0.06947040557861328, + "step": 3394 + }, + { + "epoch": 0.22948492632148168, + "grad_norm": 0.2751765251159668, + "learning_rate": 2.7085413743331004e-05, + "loss": 0.04152679443359375, + "step": 3395 + }, + { + "epoch": 0.22955252129241585, + "grad_norm": 0.2622685134410858, + "learning_rate": 2.7083459034837188e-05, + "loss": 0.035831451416015625, + "step": 3396 + }, + { + "epoch": 0.22962011626335, + "grad_norm": 0.23548012971878052, + "learning_rate": 2.7081503741670478e-05, + "loss": 0.039337158203125, + "step": 3397 + }, + { + "epoch": 0.22968771123428416, + "grad_norm": 0.6798418164253235, + "learning_rate": 2.7079547863925484e-05, + "loss": 0.21087646484375, + "step": 3398 + }, + { + "epoch": 0.22975530620521833, + "grad_norm": 0.37532809376716614, + "learning_rate": 2.7077591401696842e-05, + "loss": 0.06664276123046875, + "step": 3399 + }, + { + "epoch": 0.2298229011761525, + "grad_norm": 0.9483903050422668, + "learning_rate": 2.7075634355079215e-05, + "loss": 0.119140625, + "step": 3400 + }, + { + "epoch": 0.22989049614708665, + "grad_norm": 1.2748199701309204, + "learning_rate": 2.7073676724167307e-05, + "loss": 0.17244720458984375, + "step": 3401 + }, + { + "epoch": 0.22995809111802082, + "grad_norm": 0.2930472195148468, + "learning_rate": 2.707171850905583e-05, + "loss": 0.047210693359375, + "step": 3402 + }, + { + "epoch": 0.230025686088955, + "grad_norm": 0.8808965086936951, + "learning_rate": 2.7069759709839537e-05, + "loss": 0.1470184326171875, + "step": 3403 + }, + { + "epoch": 0.23009328105988913, + "grad_norm": 0.529309868812561, + "learning_rate": 2.706780032661321e-05, + "loss": 0.0792083740234375, + "step": 3404 + }, + { + "epoch": 0.2301608760308233, + "grad_norm": 1.2573155164718628, + "learning_rate": 2.7065840359471656e-05, + "loss": 0.1967620849609375, + "step": 3405 + }, + { + "epoch": 0.23022847100175747, + "grad_norm": 0.9235398769378662, + "learning_rate": 2.7063879808509705e-05, + "loss": 0.175445556640625, + "step": 3406 + }, + { + "epoch": 0.23029606597269164, + "grad_norm": 0.32241737842559814, + "learning_rate": 2.7061918673822226e-05, + "loss": 0.030378341674804688, + "step": 3407 + }, + { + "epoch": 0.2303636609436258, + "grad_norm": 0.4081879258155823, + "learning_rate": 2.705995695550411e-05, + "loss": 0.07021331787109375, + "step": 3408 + }, + { + "epoch": 0.23043125591455996, + "grad_norm": 1.1387337446212769, + "learning_rate": 2.7057994653650275e-05, + "loss": 0.21561431884765625, + "step": 3409 + }, + { + "epoch": 0.23049885088549413, + "grad_norm": 1.3574986457824707, + "learning_rate": 2.705603176835567e-05, + "loss": 0.2611083984375, + "step": 3410 + }, + { + "epoch": 0.23056644585642827, + "grad_norm": 0.6573098301887512, + "learning_rate": 2.7054068299715275e-05, + "loss": 0.0682525634765625, + "step": 3411 + }, + { + "epoch": 0.23063404082736244, + "grad_norm": 0.6839215159416199, + "learning_rate": 2.7052104247824092e-05, + "loss": 0.173919677734375, + "step": 3412 + }, + { + "epoch": 0.2307016357982966, + "grad_norm": 0.9521481990814209, + "learning_rate": 2.7050139612777153e-05, + "loss": 0.18640899658203125, + "step": 3413 + }, + { + "epoch": 0.23076923076923078, + "grad_norm": 0.7139975428581238, + "learning_rate": 2.704817439466952e-05, + "loss": 0.1159820556640625, + "step": 3414 + }, + { + "epoch": 0.23083682574016492, + "grad_norm": 0.5543283224105835, + "learning_rate": 2.7046208593596285e-05, + "loss": 0.118438720703125, + "step": 3415 + }, + { + "epoch": 0.2309044207110991, + "grad_norm": 0.2983923852443695, + "learning_rate": 2.7044242209652563e-05, + "loss": 0.084716796875, + "step": 3416 + }, + { + "epoch": 0.23097201568203327, + "grad_norm": 1.7412002086639404, + "learning_rate": 2.70422752429335e-05, + "loss": 0.17971038818359375, + "step": 3417 + }, + { + "epoch": 0.2310396106529674, + "grad_norm": 0.5916793942451477, + "learning_rate": 2.7040307693534272e-05, + "loss": 0.1757659912109375, + "step": 3418 + }, + { + "epoch": 0.23110720562390158, + "grad_norm": 0.4124595522880554, + "learning_rate": 2.703833956155008e-05, + "loss": 0.09061050415039062, + "step": 3419 + }, + { + "epoch": 0.23117480059483575, + "grad_norm": 1.7677608728408813, + "learning_rate": 2.703637084707615e-05, + "loss": 0.2291259765625, + "step": 3420 + }, + { + "epoch": 0.23124239556576992, + "grad_norm": 0.14141389727592468, + "learning_rate": 2.7034401550207753e-05, + "loss": 0.0331268310546875, + "step": 3421 + }, + { + "epoch": 0.23130999053670406, + "grad_norm": 0.4266669750213623, + "learning_rate": 2.7032431671040162e-05, + "loss": 0.10506439208984375, + "step": 3422 + }, + { + "epoch": 0.23137758550763823, + "grad_norm": 0.8170414566993713, + "learning_rate": 2.7030461209668696e-05, + "loss": 0.145111083984375, + "step": 3423 + }, + { + "epoch": 0.2314451804785724, + "grad_norm": 0.7954815030097961, + "learning_rate": 2.7028490166188708e-05, + "loss": 0.173828125, + "step": 3424 + }, + { + "epoch": 0.23151277544950655, + "grad_norm": 1.1101852655410767, + "learning_rate": 2.702651854069555e-05, + "loss": 0.142669677734375, + "step": 3425 + }, + { + "epoch": 0.23158037042044072, + "grad_norm": 0.562369704246521, + "learning_rate": 2.702454633328464e-05, + "loss": 0.066680908203125, + "step": 3426 + }, + { + "epoch": 0.2316479653913749, + "grad_norm": 0.7074031233787537, + "learning_rate": 2.70225735440514e-05, + "loss": 0.077667236328125, + "step": 3427 + }, + { + "epoch": 0.23171556036230903, + "grad_norm": 0.740540623664856, + "learning_rate": 2.702060017309128e-05, + "loss": 0.1576690673828125, + "step": 3428 + }, + { + "epoch": 0.2317831553332432, + "grad_norm": 0.6769096851348877, + "learning_rate": 2.7018626220499773e-05, + "loss": 0.08492279052734375, + "step": 3429 + }, + { + "epoch": 0.23185075030417737, + "grad_norm": 0.49106496572494507, + "learning_rate": 2.701665168637238e-05, + "loss": 0.08407020568847656, + "step": 3430 + }, + { + "epoch": 0.23191834527511154, + "grad_norm": 0.20690615475177765, + "learning_rate": 2.7014676570804656e-05, + "loss": 0.045337677001953125, + "step": 3431 + }, + { + "epoch": 0.23198594024604569, + "grad_norm": 0.9420359134674072, + "learning_rate": 2.7012700873892155e-05, + "loss": 0.14114761352539062, + "step": 3432 + }, + { + "epoch": 0.23205353521697986, + "grad_norm": 0.829164445400238, + "learning_rate": 2.701072459573048e-05, + "loss": 0.196258544921875, + "step": 3433 + }, + { + "epoch": 0.23212113018791403, + "grad_norm": 0.7085576057434082, + "learning_rate": 2.7008747736415255e-05, + "loss": 0.1137237548828125, + "step": 3434 + }, + { + "epoch": 0.23218872515884817, + "grad_norm": 0.6633200645446777, + "learning_rate": 2.7006770296042137e-05, + "loss": 0.1195526123046875, + "step": 3435 + }, + { + "epoch": 0.23225632012978234, + "grad_norm": 0.75583416223526, + "learning_rate": 2.70047922747068e-05, + "loss": 0.1137847900390625, + "step": 3436 + }, + { + "epoch": 0.2323239151007165, + "grad_norm": 0.9605758190155029, + "learning_rate": 2.7002813672504957e-05, + "loss": 0.14624786376953125, + "step": 3437 + }, + { + "epoch": 0.23239151007165068, + "grad_norm": 0.7631930708885193, + "learning_rate": 2.700083448953234e-05, + "loss": 0.12773895263671875, + "step": 3438 + }, + { + "epoch": 0.23245910504258482, + "grad_norm": 0.6175365447998047, + "learning_rate": 2.699885472588472e-05, + "loss": 0.11390304565429688, + "step": 3439 + }, + { + "epoch": 0.232526700013519, + "grad_norm": 1.439832329750061, + "learning_rate": 2.699687438165789e-05, + "loss": 0.201446533203125, + "step": 3440 + }, + { + "epoch": 0.23259429498445316, + "grad_norm": 0.7938131093978882, + "learning_rate": 2.699489345694767e-05, + "loss": 0.116119384765625, + "step": 3441 + }, + { + "epoch": 0.2326618899553873, + "grad_norm": 0.9655596017837524, + "learning_rate": 2.6992911951849907e-05, + "loss": 0.1397705078125, + "step": 3442 + }, + { + "epoch": 0.23272948492632148, + "grad_norm": 0.6135134696960449, + "learning_rate": 2.6990929866460483e-05, + "loss": 0.06822586059570312, + "step": 3443 + }, + { + "epoch": 0.23279707989725565, + "grad_norm": 1.3672988414764404, + "learning_rate": 2.69889472008753e-05, + "loss": 0.2614288330078125, + "step": 3444 + }, + { + "epoch": 0.23286467486818982, + "grad_norm": 0.8834191560745239, + "learning_rate": 2.698696395519029e-05, + "loss": 0.17950439453125, + "step": 3445 + }, + { + "epoch": 0.23293226983912396, + "grad_norm": 0.39606982469558716, + "learning_rate": 2.698498012950142e-05, + "loss": 0.06207275390625, + "step": 3446 + }, + { + "epoch": 0.23299986481005813, + "grad_norm": 0.828124463558197, + "learning_rate": 2.698299572390468e-05, + "loss": 0.1076202392578125, + "step": 3447 + }, + { + "epoch": 0.2330674597809923, + "grad_norm": 0.3603193163871765, + "learning_rate": 2.6981010738496082e-05, + "loss": 0.06907272338867188, + "step": 3448 + }, + { + "epoch": 0.23313505475192645, + "grad_norm": 0.8034129738807678, + "learning_rate": 2.6979025173371675e-05, + "loss": 0.0914764404296875, + "step": 3449 + }, + { + "epoch": 0.23320264972286062, + "grad_norm": 0.827307939529419, + "learning_rate": 2.6977039028627538e-05, + "loss": 0.13614654541015625, + "step": 3450 + }, + { + "epoch": 0.2332702446937948, + "grad_norm": 0.526023268699646, + "learning_rate": 2.6975052304359765e-05, + "loss": 0.0926971435546875, + "step": 3451 + }, + { + "epoch": 0.23333783966472896, + "grad_norm": 0.8890734910964966, + "learning_rate": 2.697306500066449e-05, + "loss": 0.1529083251953125, + "step": 3452 + }, + { + "epoch": 0.2334054346356631, + "grad_norm": 1.061214566230774, + "learning_rate": 2.697107711763787e-05, + "loss": 0.1524810791015625, + "step": 3453 + }, + { + "epoch": 0.23347302960659727, + "grad_norm": 0.32474038004875183, + "learning_rate": 2.696908865537609e-05, + "loss": 0.058391571044921875, + "step": 3454 + }, + { + "epoch": 0.23354062457753144, + "grad_norm": 0.6233418583869934, + "learning_rate": 2.696709961397537e-05, + "loss": 0.130889892578125, + "step": 3455 + }, + { + "epoch": 0.23360821954846558, + "grad_norm": 0.7689430117607117, + "learning_rate": 2.6965109993531946e-05, + "loss": 0.1558074951171875, + "step": 3456 + }, + { + "epoch": 0.23367581451939975, + "grad_norm": 1.0929944515228271, + "learning_rate": 2.6963119794142087e-05, + "loss": 0.14644241333007812, + "step": 3457 + }, + { + "epoch": 0.23374340949033393, + "grad_norm": 0.8992771506309509, + "learning_rate": 2.69611290159021e-05, + "loss": 0.1739501953125, + "step": 3458 + }, + { + "epoch": 0.23381100446126807, + "grad_norm": 0.4860808551311493, + "learning_rate": 2.69591376589083e-05, + "loss": 0.0818328857421875, + "step": 3459 + }, + { + "epoch": 0.23387859943220224, + "grad_norm": 0.9558554887771606, + "learning_rate": 2.6957145723257056e-05, + "loss": 0.1608428955078125, + "step": 3460 + }, + { + "epoch": 0.2339461944031364, + "grad_norm": 1.4810115098953247, + "learning_rate": 2.6955153209044733e-05, + "loss": 0.177581787109375, + "step": 3461 + }, + { + "epoch": 0.23401378937407058, + "grad_norm": 0.7329792976379395, + "learning_rate": 2.6953160116367753e-05, + "loss": 0.151611328125, + "step": 3462 + }, + { + "epoch": 0.23408138434500472, + "grad_norm": 0.37295210361480713, + "learning_rate": 2.6951166445322548e-05, + "loss": 0.059929847717285156, + "step": 3463 + }, + { + "epoch": 0.2341489793159389, + "grad_norm": 1.3478622436523438, + "learning_rate": 2.6949172196005592e-05, + "loss": 0.243011474609375, + "step": 3464 + }, + { + "epoch": 0.23421657428687306, + "grad_norm": 0.29408320784568787, + "learning_rate": 2.6947177368513368e-05, + "loss": 0.0472259521484375, + "step": 3465 + }, + { + "epoch": 0.2342841692578072, + "grad_norm": 0.5248720049858093, + "learning_rate": 2.694518196294241e-05, + "loss": 0.0983734130859375, + "step": 3466 + }, + { + "epoch": 0.23435176422874138, + "grad_norm": 1.1328579187393188, + "learning_rate": 2.694318597938926e-05, + "loss": 0.2320556640625, + "step": 3467 + }, + { + "epoch": 0.23441935919967555, + "grad_norm": 0.3278715908527374, + "learning_rate": 2.6941189417950497e-05, + "loss": 0.06203269958496094, + "step": 3468 + }, + { + "epoch": 0.23448695417060972, + "grad_norm": 0.2398069202899933, + "learning_rate": 2.693919227872273e-05, + "loss": 0.03348731994628906, + "step": 3469 + }, + { + "epoch": 0.23455454914154386, + "grad_norm": 0.6354507803916931, + "learning_rate": 2.693719456180259e-05, + "loss": 0.09169769287109375, + "step": 3470 + }, + { + "epoch": 0.23462214411247803, + "grad_norm": 0.8243688941001892, + "learning_rate": 2.693519626728674e-05, + "loss": 0.11455917358398438, + "step": 3471 + }, + { + "epoch": 0.2346897390834122, + "grad_norm": 0.6758399605751038, + "learning_rate": 2.6933197395271872e-05, + "loss": 0.19776153564453125, + "step": 3472 + }, + { + "epoch": 0.23475733405434634, + "grad_norm": 1.0703206062316895, + "learning_rate": 2.69311979458547e-05, + "loss": 0.180206298828125, + "step": 3473 + }, + { + "epoch": 0.23482492902528052, + "grad_norm": 0.5464199185371399, + "learning_rate": 2.6929197919131976e-05, + "loss": 0.15472412109375, + "step": 3474 + }, + { + "epoch": 0.23489252399621469, + "grad_norm": 0.622519314289093, + "learning_rate": 2.692719731520047e-05, + "loss": 0.1369781494140625, + "step": 3475 + }, + { + "epoch": 0.23496011896714886, + "grad_norm": 0.35402050614356995, + "learning_rate": 2.6925196134156978e-05, + "loss": 0.0807952880859375, + "step": 3476 + }, + { + "epoch": 0.235027713938083, + "grad_norm": 1.0585401058197021, + "learning_rate": 2.692319437609834e-05, + "loss": 0.17160415649414062, + "step": 3477 + }, + { + "epoch": 0.23509530890901717, + "grad_norm": 0.6281086206436157, + "learning_rate": 2.6921192041121404e-05, + "loss": 0.10912704467773438, + "step": 3478 + }, + { + "epoch": 0.23516290387995134, + "grad_norm": 0.32056427001953125, + "learning_rate": 2.6919189129323067e-05, + "loss": 0.06277084350585938, + "step": 3479 + }, + { + "epoch": 0.23523049885088548, + "grad_norm": 0.475169837474823, + "learning_rate": 2.6917185640800228e-05, + "loss": 0.0921630859375, + "step": 3480 + }, + { + "epoch": 0.23529809382181965, + "grad_norm": 0.9661656022071838, + "learning_rate": 2.691518157564984e-05, + "loss": 0.15313720703125, + "step": 3481 + }, + { + "epoch": 0.23536568879275382, + "grad_norm": 0.8739187121391296, + "learning_rate": 2.6913176933968864e-05, + "loss": 0.1389923095703125, + "step": 3482 + }, + { + "epoch": 0.235433283763688, + "grad_norm": 0.572083055973053, + "learning_rate": 2.6911171715854305e-05, + "loss": 0.08773040771484375, + "step": 3483 + }, + { + "epoch": 0.23550087873462214, + "grad_norm": 0.38992172479629517, + "learning_rate": 2.6909165921403183e-05, + "loss": 0.077911376953125, + "step": 3484 + }, + { + "epoch": 0.2355684737055563, + "grad_norm": 1.1796520948410034, + "learning_rate": 2.6907159550712552e-05, + "loss": 0.134002685546875, + "step": 3485 + }, + { + "epoch": 0.23563606867649048, + "grad_norm": 1.4230090379714966, + "learning_rate": 2.690515260387949e-05, + "loss": 0.2633819580078125, + "step": 3486 + }, + { + "epoch": 0.23570366364742462, + "grad_norm": 1.2386561632156372, + "learning_rate": 2.6903145081001103e-05, + "loss": 0.252960205078125, + "step": 3487 + }, + { + "epoch": 0.2357712586183588, + "grad_norm": 0.7115809321403503, + "learning_rate": 2.6901136982174537e-05, + "loss": 0.13879013061523438, + "step": 3488 + }, + { + "epoch": 0.23583885358929296, + "grad_norm": 0.3799358010292053, + "learning_rate": 2.6899128307496953e-05, + "loss": 0.059539794921875, + "step": 3489 + }, + { + "epoch": 0.2359064485602271, + "grad_norm": 0.4351961612701416, + "learning_rate": 2.689711905706554e-05, + "loss": 0.08543777465820312, + "step": 3490 + }, + { + "epoch": 0.23597404353116128, + "grad_norm": 0.5167995095252991, + "learning_rate": 2.689510923097752e-05, + "loss": 0.1234130859375, + "step": 3491 + }, + { + "epoch": 0.23604163850209545, + "grad_norm": 0.6316211223602295, + "learning_rate": 2.6893098829330134e-05, + "loss": 0.11906051635742188, + "step": 3492 + }, + { + "epoch": 0.23610923347302962, + "grad_norm": 0.6657699346542358, + "learning_rate": 2.6891087852220674e-05, + "loss": 0.12661361694335938, + "step": 3493 + }, + { + "epoch": 0.23617682844396376, + "grad_norm": 4.14849328994751, + "learning_rate": 2.688907629974643e-05, + "loss": 0.305877685546875, + "step": 3494 + }, + { + "epoch": 0.23624442341489793, + "grad_norm": 1.0985639095306396, + "learning_rate": 2.6887064172004738e-05, + "loss": 0.1907958984375, + "step": 3495 + }, + { + "epoch": 0.2363120183858321, + "grad_norm": 0.8416627049446106, + "learning_rate": 2.688505146909296e-05, + "loss": 0.14712905883789062, + "step": 3496 + }, + { + "epoch": 0.23637961335676624, + "grad_norm": 0.697276771068573, + "learning_rate": 2.6883038191108472e-05, + "loss": 0.14012908935546875, + "step": 3497 + }, + { + "epoch": 0.23644720832770041, + "grad_norm": 0.3971121609210968, + "learning_rate": 2.68810243381487e-05, + "loss": 0.04582977294921875, + "step": 3498 + }, + { + "epoch": 0.23651480329863458, + "grad_norm": 0.26726412773132324, + "learning_rate": 2.687900991031108e-05, + "loss": 0.06357574462890625, + "step": 3499 + }, + { + "epoch": 0.23658239826956876, + "grad_norm": 0.7036029696464539, + "learning_rate": 2.6876994907693094e-05, + "loss": 0.1385955810546875, + "step": 3500 + }, + { + "epoch": 0.2366499932405029, + "grad_norm": 1.02956223487854, + "learning_rate": 2.6874979330392227e-05, + "loss": 0.117401123046875, + "step": 3501 + }, + { + "epoch": 0.23671758821143707, + "grad_norm": 0.6227487921714783, + "learning_rate": 2.6872963178506007e-05, + "loss": 0.1072998046875, + "step": 3502 + }, + { + "epoch": 0.23678518318237124, + "grad_norm": 1.1195297241210938, + "learning_rate": 2.6870946452131997e-05, + "loss": 0.1790771484375, + "step": 3503 + }, + { + "epoch": 0.23685277815330538, + "grad_norm": 1.1660912036895752, + "learning_rate": 2.686892915136777e-05, + "loss": 0.1998443603515625, + "step": 3504 + }, + { + "epoch": 0.23692037312423955, + "grad_norm": 0.8915186524391174, + "learning_rate": 2.6866911276310938e-05, + "loss": 0.1549530029296875, + "step": 3505 + }, + { + "epoch": 0.23698796809517372, + "grad_norm": 0.6945031881332397, + "learning_rate": 2.686489282705914e-05, + "loss": 0.050689697265625, + "step": 3506 + }, + { + "epoch": 0.2370555630661079, + "grad_norm": 0.6340466737747192, + "learning_rate": 2.686287380371004e-05, + "loss": 0.12870025634765625, + "step": 3507 + }, + { + "epoch": 0.23712315803704204, + "grad_norm": 0.2190609872341156, + "learning_rate": 2.6860854206361332e-05, + "loss": 0.03904151916503906, + "step": 3508 + }, + { + "epoch": 0.2371907530079762, + "grad_norm": 0.7380692362785339, + "learning_rate": 2.6858834035110736e-05, + "loss": 0.1380157470703125, + "step": 3509 + }, + { + "epoch": 0.23725834797891038, + "grad_norm": 1.621742844581604, + "learning_rate": 2.6856813290055996e-05, + "loss": 0.1818695068359375, + "step": 3510 + }, + { + "epoch": 0.23732594294984452, + "grad_norm": 0.4221528470516205, + "learning_rate": 2.6854791971294894e-05, + "loss": 0.086456298828125, + "step": 3511 + }, + { + "epoch": 0.2373935379207787, + "grad_norm": 0.4968802034854889, + "learning_rate": 2.6852770078925235e-05, + "loss": 0.093017578125, + "step": 3512 + }, + { + "epoch": 0.23746113289171286, + "grad_norm": 0.5519598126411438, + "learning_rate": 2.6850747613044845e-05, + "loss": 0.13530731201171875, + "step": 3513 + }, + { + "epoch": 0.23752872786264703, + "grad_norm": 0.6117637753486633, + "learning_rate": 2.684872457375159e-05, + "loss": 0.13773345947265625, + "step": 3514 + }, + { + "epoch": 0.23759632283358117, + "grad_norm": 0.7753311991691589, + "learning_rate": 2.684670096114335e-05, + "loss": 0.137420654296875, + "step": 3515 + }, + { + "epoch": 0.23766391780451535, + "grad_norm": 0.46356499195098877, + "learning_rate": 2.6844676775318047e-05, + "loss": 0.061893463134765625, + "step": 3516 + }, + { + "epoch": 0.23773151277544952, + "grad_norm": 0.5639359354972839, + "learning_rate": 2.6842652016373616e-05, + "loss": 0.15155029296875, + "step": 3517 + }, + { + "epoch": 0.23779910774638366, + "grad_norm": 0.670030415058136, + "learning_rate": 2.6840626684408035e-05, + "loss": 0.147735595703125, + "step": 3518 + }, + { + "epoch": 0.23786670271731783, + "grad_norm": 0.32098764181137085, + "learning_rate": 2.68386007795193e-05, + "loss": 0.0405120849609375, + "step": 3519 + }, + { + "epoch": 0.237934297688252, + "grad_norm": 0.33651742339134216, + "learning_rate": 2.6836574301805434e-05, + "loss": 0.06991195678710938, + "step": 3520 + }, + { + "epoch": 0.23800189265918614, + "grad_norm": 0.8122236728668213, + "learning_rate": 2.6834547251364495e-05, + "loss": 0.1552276611328125, + "step": 3521 + }, + { + "epoch": 0.2380694876301203, + "grad_norm": 0.1715521663427353, + "learning_rate": 2.6832519628294556e-05, + "loss": 0.037384033203125, + "step": 3522 + }, + { + "epoch": 0.23813708260105448, + "grad_norm": 1.0259668827056885, + "learning_rate": 2.6830491432693736e-05, + "loss": 0.1657257080078125, + "step": 3523 + }, + { + "epoch": 0.23820467757198865, + "grad_norm": 0.6367020606994629, + "learning_rate": 2.6828462664660166e-05, + "loss": 0.1236572265625, + "step": 3524 + }, + { + "epoch": 0.2382722725429228, + "grad_norm": 0.4285097122192383, + "learning_rate": 2.6826433324292013e-05, + "loss": 0.08843231201171875, + "step": 3525 + }, + { + "epoch": 0.23833986751385697, + "grad_norm": 0.6892672181129456, + "learning_rate": 2.6824403411687467e-05, + "loss": 0.14088821411132812, + "step": 3526 + }, + { + "epoch": 0.23840746248479114, + "grad_norm": 0.9016096591949463, + "learning_rate": 2.682237292694475e-05, + "loss": 0.10465240478515625, + "step": 3527 + }, + { + "epoch": 0.23847505745572528, + "grad_norm": 0.43578121066093445, + "learning_rate": 2.6820341870162104e-05, + "loss": 0.0767822265625, + "step": 3528 + }, + { + "epoch": 0.23854265242665945, + "grad_norm": 2.1816012859344482, + "learning_rate": 2.681831024143781e-05, + "loss": 0.2608642578125, + "step": 3529 + }, + { + "epoch": 0.23861024739759362, + "grad_norm": 0.2753152847290039, + "learning_rate": 2.6816278040870172e-05, + "loss": 0.05692291259765625, + "step": 3530 + }, + { + "epoch": 0.2386778423685278, + "grad_norm": 1.114912748336792, + "learning_rate": 2.681424526855752e-05, + "loss": 0.1806640625, + "step": 3531 + }, + { + "epoch": 0.23874543733946194, + "grad_norm": 0.5365961194038391, + "learning_rate": 2.6812211924598204e-05, + "loss": 0.05835723876953125, + "step": 3532 + }, + { + "epoch": 0.2388130323103961, + "grad_norm": 0.8779134154319763, + "learning_rate": 2.681017800909062e-05, + "loss": 0.07311248779296875, + "step": 3533 + }, + { + "epoch": 0.23888062728133028, + "grad_norm": 0.7061172723770142, + "learning_rate": 2.6808143522133178e-05, + "loss": 0.1144256591796875, + "step": 3534 + }, + { + "epoch": 0.23894822225226442, + "grad_norm": 0.4523433446884155, + "learning_rate": 2.6806108463824314e-05, + "loss": 0.0813140869140625, + "step": 3535 + }, + { + "epoch": 0.2390158172231986, + "grad_norm": 0.31535449624061584, + "learning_rate": 2.6804072834262502e-05, + "loss": 0.0869293212890625, + "step": 3536 + }, + { + "epoch": 0.23908341219413276, + "grad_norm": 0.8221253156661987, + "learning_rate": 2.6802036633546235e-05, + "loss": 0.1394805908203125, + "step": 3537 + }, + { + "epoch": 0.23915100716506693, + "grad_norm": 0.3108317255973816, + "learning_rate": 2.6799999861774046e-05, + "loss": 0.06859970092773438, + "step": 3538 + }, + { + "epoch": 0.23921860213600107, + "grad_norm": 0.24838466942310333, + "learning_rate": 2.6797962519044476e-05, + "loss": 0.05666160583496094, + "step": 3539 + }, + { + "epoch": 0.23928619710693524, + "grad_norm": 0.8555039167404175, + "learning_rate": 2.6795924605456113e-05, + "loss": 0.1464080810546875, + "step": 3540 + }, + { + "epoch": 0.23935379207786942, + "grad_norm": 0.36353954672813416, + "learning_rate": 2.6793886121107554e-05, + "loss": 0.08447265625, + "step": 3541 + }, + { + "epoch": 0.23942138704880356, + "grad_norm": 0.41455116868019104, + "learning_rate": 2.6791847066097442e-05, + "loss": 0.0951080322265625, + "step": 3542 + }, + { + "epoch": 0.23948898201973773, + "grad_norm": 0.3800804913043976, + "learning_rate": 2.6789807440524436e-05, + "loss": 0.07295989990234375, + "step": 3543 + }, + { + "epoch": 0.2395565769906719, + "grad_norm": 0.5664997696876526, + "learning_rate": 2.678776724448723e-05, + "loss": 0.09637451171875, + "step": 3544 + }, + { + "epoch": 0.23962417196160607, + "grad_norm": 0.5984950065612793, + "learning_rate": 2.6785726478084533e-05, + "loss": 0.086029052734375, + "step": 3545 + }, + { + "epoch": 0.2396917669325402, + "grad_norm": 0.5531508922576904, + "learning_rate": 2.678368514141509e-05, + "loss": 0.11006927490234375, + "step": 3546 + }, + { + "epoch": 0.23975936190347438, + "grad_norm": 0.9470627903938293, + "learning_rate": 2.6781643234577683e-05, + "loss": 0.14173126220703125, + "step": 3547 + }, + { + "epoch": 0.23982695687440855, + "grad_norm": 0.7712075114250183, + "learning_rate": 2.6779600757671107e-05, + "loss": 0.1625518798828125, + "step": 3548 + }, + { + "epoch": 0.2398945518453427, + "grad_norm": 1.1059556007385254, + "learning_rate": 2.677755771079419e-05, + "loss": 0.232879638671875, + "step": 3549 + }, + { + "epoch": 0.23996214681627687, + "grad_norm": 0.5971564650535583, + "learning_rate": 2.6775514094045787e-05, + "loss": 0.1463623046875, + "step": 3550 + }, + { + "epoch": 0.24002974178721104, + "grad_norm": 0.2965995967388153, + "learning_rate": 2.6773469907524776e-05, + "loss": 0.04998588562011719, + "step": 3551 + }, + { + "epoch": 0.2400973367581452, + "grad_norm": 0.5223893523216248, + "learning_rate": 2.6771425151330077e-05, + "loss": 0.088287353515625, + "step": 3552 + }, + { + "epoch": 0.24016493172907935, + "grad_norm": 0.9074702858924866, + "learning_rate": 2.6769379825560626e-05, + "loss": 0.16900634765625, + "step": 3553 + }, + { + "epoch": 0.24023252670001352, + "grad_norm": 0.42352747917175293, + "learning_rate": 2.6767333930315382e-05, + "loss": 0.0571746826171875, + "step": 3554 + }, + { + "epoch": 0.2403001216709477, + "grad_norm": 0.5895770788192749, + "learning_rate": 2.6765287465693348e-05, + "loss": 0.08374786376953125, + "step": 3555 + }, + { + "epoch": 0.24036771664188183, + "grad_norm": 0.8892999887466431, + "learning_rate": 2.676324043179353e-05, + "loss": 0.1302642822265625, + "step": 3556 + }, + { + "epoch": 0.240435311612816, + "grad_norm": 0.5118579268455505, + "learning_rate": 2.676119282871499e-05, + "loss": 0.1294708251953125, + "step": 3557 + }, + { + "epoch": 0.24050290658375018, + "grad_norm": 0.969606339931488, + "learning_rate": 2.67591446565568e-05, + "loss": 0.138702392578125, + "step": 3558 + }, + { + "epoch": 0.24057050155468432, + "grad_norm": 0.4961149990558624, + "learning_rate": 2.675709591541806e-05, + "loss": 0.11322021484375, + "step": 3559 + }, + { + "epoch": 0.2406380965256185, + "grad_norm": 0.4969501495361328, + "learning_rate": 2.6755046605397903e-05, + "loss": 0.1092071533203125, + "step": 3560 + }, + { + "epoch": 0.24070569149655266, + "grad_norm": 0.21827299892902374, + "learning_rate": 2.675299672659549e-05, + "loss": 0.037975311279296875, + "step": 3561 + }, + { + "epoch": 0.24077328646748683, + "grad_norm": 0.4633868932723999, + "learning_rate": 2.675094627911e-05, + "loss": 0.07797622680664062, + "step": 3562 + }, + { + "epoch": 0.24084088143842097, + "grad_norm": 0.6381114721298218, + "learning_rate": 2.6748895263040654e-05, + "loss": 0.13643646240234375, + "step": 3563 + }, + { + "epoch": 0.24090847640935514, + "grad_norm": 0.9364556670188904, + "learning_rate": 2.6746843678486686e-05, + "loss": 0.16500091552734375, + "step": 3564 + }, + { + "epoch": 0.24097607138028931, + "grad_norm": 0.5313096642494202, + "learning_rate": 2.6744791525547373e-05, + "loss": 0.10243797302246094, + "step": 3565 + }, + { + "epoch": 0.24104366635122346, + "grad_norm": 0.34525078535079956, + "learning_rate": 2.6742738804322e-05, + "loss": 0.10124969482421875, + "step": 3566 + }, + { + "epoch": 0.24111126132215763, + "grad_norm": 0.4881431460380554, + "learning_rate": 2.6740685514909904e-05, + "loss": 0.10205459594726562, + "step": 3567 + }, + { + "epoch": 0.2411788562930918, + "grad_norm": 0.48383069038391113, + "learning_rate": 2.6738631657410422e-05, + "loss": 0.130584716796875, + "step": 3568 + }, + { + "epoch": 0.24124645126402597, + "grad_norm": 0.5105220079421997, + "learning_rate": 2.673657723192294e-05, + "loss": 0.1359405517578125, + "step": 3569 + }, + { + "epoch": 0.2413140462349601, + "grad_norm": 0.5310540795326233, + "learning_rate": 2.673452223854686e-05, + "loss": 0.07312774658203125, + "step": 3570 + }, + { + "epoch": 0.24138164120589428, + "grad_norm": 0.22274541854858398, + "learning_rate": 2.6732466677381625e-05, + "loss": 0.037693023681640625, + "step": 3571 + }, + { + "epoch": 0.24144923617682845, + "grad_norm": 0.6577059030532837, + "learning_rate": 2.6730410548526683e-05, + "loss": 0.114501953125, + "step": 3572 + }, + { + "epoch": 0.2415168311477626, + "grad_norm": 0.4201599061489105, + "learning_rate": 2.672835385208153e-05, + "loss": 0.09580230712890625, + "step": 3573 + }, + { + "epoch": 0.24158442611869677, + "grad_norm": 0.9671574831008911, + "learning_rate": 2.6726296588145676e-05, + "loss": 0.20037841796875, + "step": 3574 + }, + { + "epoch": 0.24165202108963094, + "grad_norm": 0.5694844722747803, + "learning_rate": 2.6724238756818678e-05, + "loss": 0.1116180419921875, + "step": 3575 + }, + { + "epoch": 0.2417196160605651, + "grad_norm": 0.6992793083190918, + "learning_rate": 2.6722180358200088e-05, + "loss": 0.08793258666992188, + "step": 3576 + }, + { + "epoch": 0.24178721103149925, + "grad_norm": 0.9056000709533691, + "learning_rate": 2.6720121392389516e-05, + "loss": 0.2017669677734375, + "step": 3577 + }, + { + "epoch": 0.24185480600243342, + "grad_norm": 0.9369120597839355, + "learning_rate": 2.6718061859486582e-05, + "loss": 0.178802490234375, + "step": 3578 + }, + { + "epoch": 0.2419224009733676, + "grad_norm": 0.3580947518348694, + "learning_rate": 2.6716001759590943e-05, + "loss": 0.080230712890625, + "step": 3579 + }, + { + "epoch": 0.24198999594430173, + "grad_norm": 0.3976242244243622, + "learning_rate": 2.6713941092802275e-05, + "loss": 0.05068206787109375, + "step": 3580 + }, + { + "epoch": 0.2420575909152359, + "grad_norm": 1.0506842136383057, + "learning_rate": 2.6711879859220295e-05, + "loss": 0.13495635986328125, + "step": 3581 + }, + { + "epoch": 0.24212518588617007, + "grad_norm": 1.28238844871521, + "learning_rate": 2.6709818058944727e-05, + "loss": 0.183135986328125, + "step": 3582 + }, + { + "epoch": 0.24219278085710425, + "grad_norm": 0.7186909317970276, + "learning_rate": 2.670775569207534e-05, + "loss": 0.1118927001953125, + "step": 3583 + }, + { + "epoch": 0.2422603758280384, + "grad_norm": 1.105986475944519, + "learning_rate": 2.6705692758711926e-05, + "loss": 0.221649169921875, + "step": 3584 + }, + { + "epoch": 0.24232797079897256, + "grad_norm": 0.4154735207557678, + "learning_rate": 2.6703629258954295e-05, + "loss": 0.06156158447265625, + "step": 3585 + }, + { + "epoch": 0.24239556576990673, + "grad_norm": 0.38070395588874817, + "learning_rate": 2.6701565192902297e-05, + "loss": 0.084075927734375, + "step": 3586 + }, + { + "epoch": 0.24246316074084087, + "grad_norm": 0.9515017867088318, + "learning_rate": 2.6699500560655805e-05, + "loss": 0.10162353515625, + "step": 3587 + }, + { + "epoch": 0.24253075571177504, + "grad_norm": 0.19447746872901917, + "learning_rate": 2.6697435362314716e-05, + "loss": 0.030260086059570312, + "step": 3588 + }, + { + "epoch": 0.2425983506827092, + "grad_norm": 0.5541613698005676, + "learning_rate": 2.669536959797896e-05, + "loss": 0.12561798095703125, + "step": 3589 + }, + { + "epoch": 0.24266594565364336, + "grad_norm": 0.2545444071292877, + "learning_rate": 2.6693303267748492e-05, + "loss": 0.039608001708984375, + "step": 3590 + }, + { + "epoch": 0.24273354062457753, + "grad_norm": 0.2248457670211792, + "learning_rate": 2.6691236371723287e-05, + "loss": 0.044780731201171875, + "step": 3591 + }, + { + "epoch": 0.2428011355955117, + "grad_norm": 0.4678989350795746, + "learning_rate": 2.6689168910003363e-05, + "loss": 0.097991943359375, + "step": 3592 + }, + { + "epoch": 0.24286873056644587, + "grad_norm": 1.2252233028411865, + "learning_rate": 2.668710088268875e-05, + "loss": 0.214935302734375, + "step": 3593 + }, + { + "epoch": 0.24293632553738, + "grad_norm": 0.3173646330833435, + "learning_rate": 2.6685032289879517e-05, + "loss": 0.040462493896484375, + "step": 3594 + }, + { + "epoch": 0.24300392050831418, + "grad_norm": 0.7450681328773499, + "learning_rate": 2.668296313167575e-05, + "loss": 0.1415557861328125, + "step": 3595 + }, + { + "epoch": 0.24307151547924835, + "grad_norm": 0.3847959637641907, + "learning_rate": 2.668089340817758e-05, + "loss": 0.052143096923828125, + "step": 3596 + }, + { + "epoch": 0.2431391104501825, + "grad_norm": 0.4839482605457306, + "learning_rate": 2.667882311948514e-05, + "loss": 0.07105255126953125, + "step": 3597 + }, + { + "epoch": 0.24320670542111666, + "grad_norm": 0.48230552673339844, + "learning_rate": 2.6676752265698603e-05, + "loss": 0.0946807861328125, + "step": 3598 + }, + { + "epoch": 0.24327430039205084, + "grad_norm": 0.46040818095207214, + "learning_rate": 2.6674680846918177e-05, + "loss": 0.07421875, + "step": 3599 + }, + { + "epoch": 0.243341895362985, + "grad_norm": 0.9938527941703796, + "learning_rate": 2.667260886324409e-05, + "loss": 0.13623809814453125, + "step": 3600 + }, + { + "epoch": 0.24340949033391915, + "grad_norm": 0.40238574147224426, + "learning_rate": 2.6670536314776593e-05, + "loss": 0.0811920166015625, + "step": 3601 + }, + { + "epoch": 0.24347708530485332, + "grad_norm": 0.5459176898002625, + "learning_rate": 2.6668463201615972e-05, + "loss": 0.11650848388671875, + "step": 3602 + }, + { + "epoch": 0.2435446802757875, + "grad_norm": 0.7117465734481812, + "learning_rate": 2.6666389523862535e-05, + "loss": 0.13250732421875, + "step": 3603 + }, + { + "epoch": 0.24361227524672163, + "grad_norm": 0.9499998688697815, + "learning_rate": 2.666431528161662e-05, + "loss": 0.21759033203125, + "step": 3604 + }, + { + "epoch": 0.2436798702176558, + "grad_norm": 0.34302571415901184, + "learning_rate": 2.6662240474978594e-05, + "loss": 0.04753875732421875, + "step": 3605 + }, + { + "epoch": 0.24374746518858997, + "grad_norm": 1.6449156999588013, + "learning_rate": 2.6660165104048846e-05, + "loss": 0.1777496337890625, + "step": 3606 + }, + { + "epoch": 0.24381506015952414, + "grad_norm": 1.2680327892303467, + "learning_rate": 2.6658089168927794e-05, + "loss": 0.2005767822265625, + "step": 3607 + }, + { + "epoch": 0.2438826551304583, + "grad_norm": 0.39749395847320557, + "learning_rate": 2.6656012669715893e-05, + "loss": 0.07164764404296875, + "step": 3608 + }, + { + "epoch": 0.24395025010139246, + "grad_norm": 0.5523221492767334, + "learning_rate": 2.6653935606513607e-05, + "loss": 0.143585205078125, + "step": 3609 + }, + { + "epoch": 0.24401784507232663, + "grad_norm": 0.46225446462631226, + "learning_rate": 2.665185797942144e-05, + "loss": 0.07758331298828125, + "step": 3610 + }, + { + "epoch": 0.24408544004326077, + "grad_norm": 0.34568941593170166, + "learning_rate": 2.6649779788539924e-05, + "loss": 0.0575408935546875, + "step": 3611 + }, + { + "epoch": 0.24415303501419494, + "grad_norm": 1.3305063247680664, + "learning_rate": 2.664770103396961e-05, + "loss": 0.19112396240234375, + "step": 3612 + }, + { + "epoch": 0.2442206299851291, + "grad_norm": 1.5081758499145508, + "learning_rate": 2.6645621715811083e-05, + "loss": 0.17456817626953125, + "step": 3613 + }, + { + "epoch": 0.24428822495606328, + "grad_norm": 0.8018778562545776, + "learning_rate": 2.664354183416496e-05, + "loss": 0.13482666015625, + "step": 3614 + }, + { + "epoch": 0.24435581992699743, + "grad_norm": 0.6471530199050903, + "learning_rate": 2.664146138913187e-05, + "loss": 0.163543701171875, + "step": 3615 + }, + { + "epoch": 0.2444234148979316, + "grad_norm": 1.334294319152832, + "learning_rate": 2.663938038081248e-05, + "loss": 0.1681365966796875, + "step": 3616 + }, + { + "epoch": 0.24449100986886577, + "grad_norm": 0.6776244044303894, + "learning_rate": 2.6637298809307483e-05, + "loss": 0.08819580078125, + "step": 3617 + }, + { + "epoch": 0.2445586048397999, + "grad_norm": 0.6026324033737183, + "learning_rate": 2.6635216674717595e-05, + "loss": 0.1242828369140625, + "step": 3618 + }, + { + "epoch": 0.24462619981073408, + "grad_norm": 0.6917544007301331, + "learning_rate": 2.6633133977143572e-05, + "loss": 0.15250396728515625, + "step": 3619 + }, + { + "epoch": 0.24469379478166825, + "grad_norm": 1.7115205526351929, + "learning_rate": 2.6631050716686177e-05, + "loss": 0.2073974609375, + "step": 3620 + }, + { + "epoch": 0.2447613897526024, + "grad_norm": 1.1506661176681519, + "learning_rate": 2.6628966893446215e-05, + "loss": 0.17493438720703125, + "step": 3621 + }, + { + "epoch": 0.24482898472353656, + "grad_norm": 0.5816487073898315, + "learning_rate": 2.662688250752452e-05, + "loss": 0.1508636474609375, + "step": 3622 + }, + { + "epoch": 0.24489657969447073, + "grad_norm": 0.3253931701183319, + "learning_rate": 2.6624797559021936e-05, + "loss": 0.07027435302734375, + "step": 3623 + }, + { + "epoch": 0.2449641746654049, + "grad_norm": 0.5687310695648193, + "learning_rate": 2.6622712048039353e-05, + "loss": 0.105133056640625, + "step": 3624 + }, + { + "epoch": 0.24503176963633905, + "grad_norm": 0.5180059671401978, + "learning_rate": 2.6620625974677687e-05, + "loss": 0.07657623291015625, + "step": 3625 + }, + { + "epoch": 0.24509936460727322, + "grad_norm": 0.692987859249115, + "learning_rate": 2.6618539339037862e-05, + "loss": 0.12538909912109375, + "step": 3626 + }, + { + "epoch": 0.2451669595782074, + "grad_norm": 0.4211004078388214, + "learning_rate": 2.661645214122085e-05, + "loss": 0.07946014404296875, + "step": 3627 + }, + { + "epoch": 0.24523455454914153, + "grad_norm": 1.0463361740112305, + "learning_rate": 2.661436438132764e-05, + "loss": 0.22894287109375, + "step": 3628 + }, + { + "epoch": 0.2453021495200757, + "grad_norm": 0.7919555902481079, + "learning_rate": 2.6612276059459253e-05, + "loss": 0.2249755859375, + "step": 3629 + }, + { + "epoch": 0.24536974449100987, + "grad_norm": 1.1699528694152832, + "learning_rate": 2.661018717571674e-05, + "loss": 0.23931884765625, + "step": 3630 + }, + { + "epoch": 0.24543733946194404, + "grad_norm": 1.4098533391952515, + "learning_rate": 2.6608097730201163e-05, + "loss": 0.2830810546875, + "step": 3631 + }, + { + "epoch": 0.24550493443287819, + "grad_norm": 0.9355670809745789, + "learning_rate": 2.660600772301363e-05, + "loss": 0.18109130859375, + "step": 3632 + }, + { + "epoch": 0.24557252940381236, + "grad_norm": 0.9506962299346924, + "learning_rate": 2.6603917154255265e-05, + "loss": 0.13639068603515625, + "step": 3633 + }, + { + "epoch": 0.24564012437474653, + "grad_norm": 0.8130632042884827, + "learning_rate": 2.660182602402722e-05, + "loss": 0.12122344970703125, + "step": 3634 + }, + { + "epoch": 0.24570771934568067, + "grad_norm": 1.0270092487335205, + "learning_rate": 2.6599734332430686e-05, + "loss": 0.15618896484375, + "step": 3635 + }, + { + "epoch": 0.24577531431661484, + "grad_norm": 0.5870614051818848, + "learning_rate": 2.659764207956687e-05, + "loss": 0.07895469665527344, + "step": 3636 + }, + { + "epoch": 0.245842909287549, + "grad_norm": 0.37000179290771484, + "learning_rate": 2.6595549265537003e-05, + "loss": 0.08172607421875, + "step": 3637 + }, + { + "epoch": 0.24591050425848318, + "grad_norm": 0.6973024010658264, + "learning_rate": 2.6593455890442348e-05, + "loss": 0.198638916015625, + "step": 3638 + }, + { + "epoch": 0.24597809922941732, + "grad_norm": 1.2107309103012085, + "learning_rate": 2.6591361954384196e-05, + "loss": 0.1456756591796875, + "step": 3639 + }, + { + "epoch": 0.2460456942003515, + "grad_norm": 0.5127694606781006, + "learning_rate": 2.6589267457463873e-05, + "loss": 0.082275390625, + "step": 3640 + }, + { + "epoch": 0.24611328917128567, + "grad_norm": 1.5616413354873657, + "learning_rate": 2.6587172399782714e-05, + "loss": 0.2725372314453125, + "step": 3641 + }, + { + "epoch": 0.2461808841422198, + "grad_norm": 0.3420071601867676, + "learning_rate": 2.6585076781442095e-05, + "loss": 0.062042236328125, + "step": 3642 + }, + { + "epoch": 0.24624847911315398, + "grad_norm": 0.5328503251075745, + "learning_rate": 2.6582980602543414e-05, + "loss": 0.0868072509765625, + "step": 3643 + }, + { + "epoch": 0.24631607408408815, + "grad_norm": 0.5270900130271912, + "learning_rate": 2.6580883863188097e-05, + "loss": 0.08586883544921875, + "step": 3644 + }, + { + "epoch": 0.24638366905502232, + "grad_norm": 2.081228494644165, + "learning_rate": 2.65787865634776e-05, + "loss": 0.14749526977539062, + "step": 3645 + }, + { + "epoch": 0.24645126402595646, + "grad_norm": 0.531340479850769, + "learning_rate": 2.65766887035134e-05, + "loss": 0.0736236572265625, + "step": 3646 + }, + { + "epoch": 0.24651885899689063, + "grad_norm": 0.6869735717773438, + "learning_rate": 2.6574590283397008e-05, + "loss": 0.08198165893554688, + "step": 3647 + }, + { + "epoch": 0.2465864539678248, + "grad_norm": 0.32130447030067444, + "learning_rate": 2.657249130322995e-05, + "loss": 0.0817413330078125, + "step": 3648 + }, + { + "epoch": 0.24665404893875895, + "grad_norm": 0.8158960342407227, + "learning_rate": 2.6570391763113803e-05, + "loss": 0.11075592041015625, + "step": 3649 + }, + { + "epoch": 0.24672164390969312, + "grad_norm": 1.0988786220550537, + "learning_rate": 2.6568291663150144e-05, + "loss": 0.16614532470703125, + "step": 3650 + }, + { + "epoch": 0.2467892388806273, + "grad_norm": 0.5454872846603394, + "learning_rate": 2.656619100344059e-05, + "loss": 0.113189697265625, + "step": 3651 + }, + { + "epoch": 0.24685683385156143, + "grad_norm": 0.23924778401851654, + "learning_rate": 2.6564089784086783e-05, + "loss": 0.042812347412109375, + "step": 3652 + }, + { + "epoch": 0.2469244288224956, + "grad_norm": 0.48704293370246887, + "learning_rate": 2.6561988005190402e-05, + "loss": 0.0613861083984375, + "step": 3653 + }, + { + "epoch": 0.24699202379342977, + "grad_norm": 1.0598740577697754, + "learning_rate": 2.6559885666853137e-05, + "loss": 0.183319091796875, + "step": 3654 + }, + { + "epoch": 0.24705961876436394, + "grad_norm": 0.6723018884658813, + "learning_rate": 2.655778276917671e-05, + "loss": 0.136627197265625, + "step": 3655 + }, + { + "epoch": 0.24712721373529808, + "grad_norm": 0.5229613184928894, + "learning_rate": 2.655567931226288e-05, + "loss": 0.06992340087890625, + "step": 3656 + }, + { + "epoch": 0.24719480870623226, + "grad_norm": 0.6422449946403503, + "learning_rate": 2.655357529621342e-05, + "loss": 0.131500244140625, + "step": 3657 + }, + { + "epoch": 0.24726240367716643, + "grad_norm": 0.750801146030426, + "learning_rate": 2.6551470721130132e-05, + "loss": 0.1093597412109375, + "step": 3658 + }, + { + "epoch": 0.24732999864810057, + "grad_norm": 0.7411785125732422, + "learning_rate": 2.6549365587114854e-05, + "loss": 0.12998199462890625, + "step": 3659 + }, + { + "epoch": 0.24739759361903474, + "grad_norm": 0.7313768863677979, + "learning_rate": 2.6547259894269447e-05, + "loss": 0.097747802734375, + "step": 3660 + }, + { + "epoch": 0.2474651885899689, + "grad_norm": 0.6525679230690002, + "learning_rate": 2.6545153642695796e-05, + "loss": 0.119232177734375, + "step": 3661 + }, + { + "epoch": 0.24753278356090308, + "grad_norm": 1.1378282308578491, + "learning_rate": 2.654304683249581e-05, + "loss": 0.1595611572265625, + "step": 3662 + }, + { + "epoch": 0.24760037853183722, + "grad_norm": 1.4095979928970337, + "learning_rate": 2.6540939463771432e-05, + "loss": 0.197174072265625, + "step": 3663 + }, + { + "epoch": 0.2476679735027714, + "grad_norm": 0.7038637399673462, + "learning_rate": 2.6538831536624634e-05, + "loss": 0.13361358642578125, + "step": 3664 + }, + { + "epoch": 0.24773556847370556, + "grad_norm": 0.45229968428611755, + "learning_rate": 2.6536723051157404e-05, + "loss": 0.096343994140625, + "step": 3665 + }, + { + "epoch": 0.2478031634446397, + "grad_norm": 0.26342687010765076, + "learning_rate": 2.6534614007471766e-05, + "loss": 0.06160736083984375, + "step": 3666 + }, + { + "epoch": 0.24787075841557388, + "grad_norm": 0.8735185861587524, + "learning_rate": 2.6532504405669772e-05, + "loss": 0.122650146484375, + "step": 3667 + }, + { + "epoch": 0.24793835338650805, + "grad_norm": 0.9811080098152161, + "learning_rate": 2.6530394245853494e-05, + "loss": 0.16396331787109375, + "step": 3668 + }, + { + "epoch": 0.24800594835744222, + "grad_norm": 0.6918298006057739, + "learning_rate": 2.6528283528125034e-05, + "loss": 0.1201934814453125, + "step": 3669 + }, + { + "epoch": 0.24807354332837636, + "grad_norm": 1.274781346321106, + "learning_rate": 2.6526172252586526e-05, + "loss": 0.31805419921875, + "step": 3670 + }, + { + "epoch": 0.24814113829931053, + "grad_norm": 0.32486727833747864, + "learning_rate": 2.6524060419340123e-05, + "loss": 0.05274200439453125, + "step": 3671 + }, + { + "epoch": 0.2482087332702447, + "grad_norm": 1.0645495653152466, + "learning_rate": 2.6521948028488007e-05, + "loss": 0.248138427734375, + "step": 3672 + }, + { + "epoch": 0.24827632824117885, + "grad_norm": 0.7949338555335999, + "learning_rate": 2.6519835080132395e-05, + "loss": 0.091796875, + "step": 3673 + }, + { + "epoch": 0.24834392321211302, + "grad_norm": 0.3912663459777832, + "learning_rate": 2.6517721574375518e-05, + "loss": 0.10298919677734375, + "step": 3674 + }, + { + "epoch": 0.2484115181830472, + "grad_norm": 0.6737515330314636, + "learning_rate": 2.651560751131964e-05, + "loss": 0.189361572265625, + "step": 3675 + }, + { + "epoch": 0.24847911315398136, + "grad_norm": 0.9900143146514893, + "learning_rate": 2.6513492891067067e-05, + "loss": 0.22802734375, + "step": 3676 + }, + { + "epoch": 0.2485467081249155, + "grad_norm": 1.0400060415267944, + "learning_rate": 2.6511377713720097e-05, + "loss": 0.12545013427734375, + "step": 3677 + }, + { + "epoch": 0.24861430309584967, + "grad_norm": 0.40717872977256775, + "learning_rate": 2.6509261979381087e-05, + "loss": 0.078948974609375, + "step": 3678 + }, + { + "epoch": 0.24868189806678384, + "grad_norm": 1.4408385753631592, + "learning_rate": 2.6507145688152408e-05, + "loss": 0.2210235595703125, + "step": 3679 + }, + { + "epoch": 0.24874949303771798, + "grad_norm": 0.3008183538913727, + "learning_rate": 2.6505028840136457e-05, + "loss": 0.0398712158203125, + "step": 3680 + }, + { + "epoch": 0.24881708800865215, + "grad_norm": 0.3188433349132538, + "learning_rate": 2.6502911435435664e-05, + "loss": 0.0554962158203125, + "step": 3681 + }, + { + "epoch": 0.24888468297958632, + "grad_norm": 0.2689463794231415, + "learning_rate": 2.6500793474152476e-05, + "loss": 0.04319000244140625, + "step": 3682 + }, + { + "epoch": 0.24895227795052047, + "grad_norm": 1.0175466537475586, + "learning_rate": 2.649867495638938e-05, + "loss": 0.1440277099609375, + "step": 3683 + }, + { + "epoch": 0.24901987292145464, + "grad_norm": 1.0543876886367798, + "learning_rate": 2.6496555882248877e-05, + "loss": 0.15035247802734375, + "step": 3684 + }, + { + "epoch": 0.2490874678923888, + "grad_norm": 0.5653760433197021, + "learning_rate": 2.6494436251833507e-05, + "loss": 0.07715606689453125, + "step": 3685 + }, + { + "epoch": 0.24915506286332298, + "grad_norm": 1.4758780002593994, + "learning_rate": 2.649231606524583e-05, + "loss": 0.16461181640625, + "step": 3686 + }, + { + "epoch": 0.24922265783425712, + "grad_norm": 0.3541787266731262, + "learning_rate": 2.649019532258843e-05, + "loss": 0.0690460205078125, + "step": 3687 + }, + { + "epoch": 0.2492902528051913, + "grad_norm": 1.8089908361434937, + "learning_rate": 2.648807402396392e-05, + "loss": 0.181121826171875, + "step": 3688 + }, + { + "epoch": 0.24935784777612546, + "grad_norm": 0.8112444877624512, + "learning_rate": 2.6485952169474947e-05, + "loss": 0.172882080078125, + "step": 3689 + }, + { + "epoch": 0.2494254427470596, + "grad_norm": 0.47007837891578674, + "learning_rate": 2.648382975922418e-05, + "loss": 0.07698440551757812, + "step": 3690 + }, + { + "epoch": 0.24949303771799378, + "grad_norm": 1.5119168758392334, + "learning_rate": 2.648170679331431e-05, + "loss": 0.17992401123046875, + "step": 3691 + }, + { + "epoch": 0.24956063268892795, + "grad_norm": 0.7255648970603943, + "learning_rate": 2.6479583271848065e-05, + "loss": 0.18170166015625, + "step": 3692 + }, + { + "epoch": 0.24962822765986212, + "grad_norm": 0.3290422558784485, + "learning_rate": 2.6477459194928187e-05, + "loss": 0.08091354370117188, + "step": 3693 + }, + { + "epoch": 0.24969582263079626, + "grad_norm": 1.1011995077133179, + "learning_rate": 2.6475334562657458e-05, + "loss": 0.216766357421875, + "step": 3694 + }, + { + "epoch": 0.24976341760173043, + "grad_norm": 0.5246971845626831, + "learning_rate": 2.6473209375138675e-05, + "loss": 0.1383514404296875, + "step": 3695 + }, + { + "epoch": 0.2498310125726646, + "grad_norm": 1.0007268190383911, + "learning_rate": 2.6471083632474675e-05, + "loss": 0.15887451171875, + "step": 3696 + }, + { + "epoch": 0.24989860754359874, + "grad_norm": 0.40868785977363586, + "learning_rate": 2.6468957334768308e-05, + "loss": 0.088897705078125, + "step": 3697 + }, + { + "epoch": 0.24996620251453291, + "grad_norm": 0.3903740346431732, + "learning_rate": 2.646683048212246e-05, + "loss": 0.04527473449707031, + "step": 3698 + }, + { + "epoch": 0.25003379748546706, + "grad_norm": 0.283000111579895, + "learning_rate": 2.6464703074640044e-05, + "loss": 0.037586212158203125, + "step": 3699 + }, + { + "epoch": 0.25010139245640123, + "grad_norm": 0.8152086734771729, + "learning_rate": 2.6462575112423994e-05, + "loss": 0.1186981201171875, + "step": 3700 + }, + { + "epoch": 0.2501689874273354, + "grad_norm": 0.9715700149536133, + "learning_rate": 2.6460446595577276e-05, + "loss": 0.201873779296875, + "step": 3701 + }, + { + "epoch": 0.25023658239826957, + "grad_norm": 0.21162161231040955, + "learning_rate": 2.645831752420288e-05, + "loss": 0.03275489807128906, + "step": 3702 + }, + { + "epoch": 0.25030417736920374, + "grad_norm": 0.5583238005638123, + "learning_rate": 2.6456187898403825e-05, + "loss": 0.1072998046875, + "step": 3703 + }, + { + "epoch": 0.2503717723401379, + "grad_norm": 0.2740752398967743, + "learning_rate": 2.645405771828315e-05, + "loss": 0.063018798828125, + "step": 3704 + }, + { + "epoch": 0.2504393673110721, + "grad_norm": 0.42045533657073975, + "learning_rate": 2.6451926983943935e-05, + "loss": 0.06555557250976562, + "step": 3705 + }, + { + "epoch": 0.2505069622820062, + "grad_norm": 0.5143939852714539, + "learning_rate": 2.6449795695489268e-05, + "loss": 0.08832168579101562, + "step": 3706 + }, + { + "epoch": 0.25057455725294037, + "grad_norm": 0.24879586696624756, + "learning_rate": 2.6447663853022286e-05, + "loss": 0.04313468933105469, + "step": 3707 + }, + { + "epoch": 0.25064215222387454, + "grad_norm": 1.260227918624878, + "learning_rate": 2.6445531456646132e-05, + "loss": 0.263427734375, + "step": 3708 + }, + { + "epoch": 0.2507097471948087, + "grad_norm": 0.4124927222728729, + "learning_rate": 2.6443398506463987e-05, + "loss": 0.104278564453125, + "step": 3709 + }, + { + "epoch": 0.2507773421657429, + "grad_norm": 1.3750262260437012, + "learning_rate": 2.6441265002579057e-05, + "loss": 0.16497802734375, + "step": 3710 + }, + { + "epoch": 0.25084493713667705, + "grad_norm": 0.2913651764392853, + "learning_rate": 2.6439130945094573e-05, + "loss": 0.0342864990234375, + "step": 3711 + }, + { + "epoch": 0.2509125321076112, + "grad_norm": 0.9813311100006104, + "learning_rate": 2.6436996334113793e-05, + "loss": 0.18292236328125, + "step": 3712 + }, + { + "epoch": 0.25098012707854533, + "grad_norm": 0.3680393397808075, + "learning_rate": 2.643486116974001e-05, + "loss": 0.0874481201171875, + "step": 3713 + }, + { + "epoch": 0.2510477220494795, + "grad_norm": 0.8434818983078003, + "learning_rate": 2.6432725452076524e-05, + "loss": 0.14654541015625, + "step": 3714 + }, + { + "epoch": 0.2511153170204137, + "grad_norm": 1.1570055484771729, + "learning_rate": 2.6430589181226687e-05, + "loss": 0.1784820556640625, + "step": 3715 + }, + { + "epoch": 0.25118291199134785, + "grad_norm": 1.0472509860992432, + "learning_rate": 2.6428452357293857e-05, + "loss": 0.163421630859375, + "step": 3716 + }, + { + "epoch": 0.251250506962282, + "grad_norm": 0.6108152270317078, + "learning_rate": 2.642631498038143e-05, + "loss": 0.16565704345703125, + "step": 3717 + }, + { + "epoch": 0.2513181019332162, + "grad_norm": 0.7674338817596436, + "learning_rate": 2.642417705059282e-05, + "loss": 0.1469268798828125, + "step": 3718 + }, + { + "epoch": 0.25138569690415036, + "grad_norm": 0.2280774563550949, + "learning_rate": 2.6422038568031485e-05, + "loss": 0.029893875122070312, + "step": 3719 + }, + { + "epoch": 0.2514532918750845, + "grad_norm": 1.14683997631073, + "learning_rate": 2.641989953280089e-05, + "loss": 0.182159423828125, + "step": 3720 + }, + { + "epoch": 0.25152088684601864, + "grad_norm": 0.9459814429283142, + "learning_rate": 2.6417759945004533e-05, + "loss": 0.10051727294921875, + "step": 3721 + }, + { + "epoch": 0.2515884818169528, + "grad_norm": 0.6952601075172424, + "learning_rate": 2.6415619804745942e-05, + "loss": 0.12823486328125, + "step": 3722 + }, + { + "epoch": 0.251656076787887, + "grad_norm": 0.542121410369873, + "learning_rate": 2.6413479112128676e-05, + "loss": 0.1144866943359375, + "step": 3723 + }, + { + "epoch": 0.25172367175882115, + "grad_norm": 0.8515180349349976, + "learning_rate": 2.641133786725631e-05, + "loss": 0.119110107421875, + "step": 3724 + }, + { + "epoch": 0.2517912667297553, + "grad_norm": 2.5353283882141113, + "learning_rate": 2.6409196070232452e-05, + "loss": 0.26873779296875, + "step": 3725 + }, + { + "epoch": 0.2518588617006895, + "grad_norm": 0.4345830976963043, + "learning_rate": 2.6407053721160736e-05, + "loss": 0.08783721923828125, + "step": 3726 + }, + { + "epoch": 0.2519264566716236, + "grad_norm": 0.7762943506240845, + "learning_rate": 2.6404910820144817e-05, + "loss": 0.147125244140625, + "step": 3727 + }, + { + "epoch": 0.2519940516425578, + "grad_norm": 0.7922349572181702, + "learning_rate": 2.640276736728839e-05, + "loss": 0.1198577880859375, + "step": 3728 + }, + { + "epoch": 0.25206164661349195, + "grad_norm": 0.6332769393920898, + "learning_rate": 2.640062336269516e-05, + "loss": 0.07003021240234375, + "step": 3729 + }, + { + "epoch": 0.2521292415844261, + "grad_norm": 0.8097879886627197, + "learning_rate": 2.6398478806468876e-05, + "loss": 0.171478271484375, + "step": 3730 + }, + { + "epoch": 0.2521968365553603, + "grad_norm": 0.572325587272644, + "learning_rate": 2.6396333698713304e-05, + "loss": 0.130126953125, + "step": 3731 + }, + { + "epoch": 0.25226443152629446, + "grad_norm": 0.6533191204071045, + "learning_rate": 2.639418803953223e-05, + "loss": 0.11919021606445312, + "step": 3732 + }, + { + "epoch": 0.2523320264972286, + "grad_norm": 0.2105036973953247, + "learning_rate": 2.639204182902948e-05, + "loss": 0.04109954833984375, + "step": 3733 + }, + { + "epoch": 0.25239962146816275, + "grad_norm": 0.6098679304122925, + "learning_rate": 2.63898950673089e-05, + "loss": 0.136688232421875, + "step": 3734 + }, + { + "epoch": 0.2524672164390969, + "grad_norm": 0.21479535102844238, + "learning_rate": 2.638774775447436e-05, + "loss": 0.038784027099609375, + "step": 3735 + }, + { + "epoch": 0.2525348114100311, + "grad_norm": 0.2782727777957916, + "learning_rate": 2.638559989062977e-05, + "loss": 0.0373077392578125, + "step": 3736 + }, + { + "epoch": 0.25260240638096526, + "grad_norm": 0.9680638313293457, + "learning_rate": 2.638345147587905e-05, + "loss": 0.1529083251953125, + "step": 3737 + }, + { + "epoch": 0.25267000135189943, + "grad_norm": 0.5008031129837036, + "learning_rate": 2.6381302510326153e-05, + "loss": 0.1331024169921875, + "step": 3738 + }, + { + "epoch": 0.2527375963228336, + "grad_norm": 0.9610068798065186, + "learning_rate": 2.6379152994075063e-05, + "loss": 0.1947021484375, + "step": 3739 + }, + { + "epoch": 0.2528051912937677, + "grad_norm": 0.5078487396240234, + "learning_rate": 2.637700292722978e-05, + "loss": 0.09142303466796875, + "step": 3740 + }, + { + "epoch": 0.2528727862647019, + "grad_norm": 1.1087740659713745, + "learning_rate": 2.637485230989435e-05, + "loss": 0.20513916015625, + "step": 3741 + }, + { + "epoch": 0.25294038123563606, + "grad_norm": 0.8357950448989868, + "learning_rate": 2.637270114217282e-05, + "loss": 0.1815032958984375, + "step": 3742 + }, + { + "epoch": 0.25300797620657023, + "grad_norm": 0.7401101589202881, + "learning_rate": 2.637054942416928e-05, + "loss": 0.10483551025390625, + "step": 3743 + }, + { + "epoch": 0.2530755711775044, + "grad_norm": 0.7411990165710449, + "learning_rate": 2.636839715598785e-05, + "loss": 0.0868377685546875, + "step": 3744 + }, + { + "epoch": 0.25314316614843857, + "grad_norm": 0.37349170446395874, + "learning_rate": 2.636624433773267e-05, + "loss": 0.09252166748046875, + "step": 3745 + }, + { + "epoch": 0.25321076111937274, + "grad_norm": 0.24268673360347748, + "learning_rate": 2.6364090969507897e-05, + "loss": 0.035427093505859375, + "step": 3746 + }, + { + "epoch": 0.25327835609030686, + "grad_norm": 0.5890434384346008, + "learning_rate": 2.6361937051417736e-05, + "loss": 0.12985992431640625, + "step": 3747 + }, + { + "epoch": 0.253345951061241, + "grad_norm": 0.24546295404434204, + "learning_rate": 2.6359782583566397e-05, + "loss": 0.03346443176269531, + "step": 3748 + }, + { + "epoch": 0.2534135460321752, + "grad_norm": 1.036742925643921, + "learning_rate": 2.6357627566058133e-05, + "loss": 0.1715240478515625, + "step": 3749 + }, + { + "epoch": 0.25348114100310937, + "grad_norm": 0.20431627333164215, + "learning_rate": 2.6355471998997217e-05, + "loss": 0.02468109130859375, + "step": 3750 + }, + { + "epoch": 0.25354873597404354, + "grad_norm": 1.312598466873169, + "learning_rate": 2.6353315882487942e-05, + "loss": 0.15953826904296875, + "step": 3751 + }, + { + "epoch": 0.2536163309449777, + "grad_norm": 0.6982278823852539, + "learning_rate": 2.635115921663464e-05, + "loss": 0.1602935791015625, + "step": 3752 + }, + { + "epoch": 0.2536839259159119, + "grad_norm": 0.7114322185516357, + "learning_rate": 2.634900200154166e-05, + "loss": 0.1678009033203125, + "step": 3753 + }, + { + "epoch": 0.253751520886846, + "grad_norm": 0.4345293343067169, + "learning_rate": 2.6346844237313394e-05, + "loss": 0.0650482177734375, + "step": 3754 + }, + { + "epoch": 0.25381911585778016, + "grad_norm": 2.2966411113739014, + "learning_rate": 2.6344685924054234e-05, + "loss": 0.2396240234375, + "step": 3755 + }, + { + "epoch": 0.25388671082871433, + "grad_norm": 1.0687940120697021, + "learning_rate": 2.6342527061868612e-05, + "loss": 0.201995849609375, + "step": 3756 + }, + { + "epoch": 0.2539543057996485, + "grad_norm": 0.27694255113601685, + "learning_rate": 2.6340367650861e-05, + "loss": 0.035129547119140625, + "step": 3757 + }, + { + "epoch": 0.2540219007705827, + "grad_norm": 0.3676881194114685, + "learning_rate": 2.633820769113587e-05, + "loss": 0.07234573364257812, + "step": 3758 + }, + { + "epoch": 0.25408949574151685, + "grad_norm": 0.3206271529197693, + "learning_rate": 2.6336047182797742e-05, + "loss": 0.06308746337890625, + "step": 3759 + }, + { + "epoch": 0.254157090712451, + "grad_norm": 0.8716768622398376, + "learning_rate": 2.6333886125951154e-05, + "loss": 0.222747802734375, + "step": 3760 + }, + { + "epoch": 0.25422468568338513, + "grad_norm": 0.20627108216285706, + "learning_rate": 2.6331724520700673e-05, + "loss": 0.03692436218261719, + "step": 3761 + }, + { + "epoch": 0.2542922806543193, + "grad_norm": 0.2353014349937439, + "learning_rate": 2.6329562367150885e-05, + "loss": 0.0287933349609375, + "step": 3762 + }, + { + "epoch": 0.2543598756252535, + "grad_norm": 1.4395908117294312, + "learning_rate": 2.6327399665406415e-05, + "loss": 0.1806793212890625, + "step": 3763 + }, + { + "epoch": 0.25442747059618764, + "grad_norm": 0.8967711329460144, + "learning_rate": 2.6325236415571906e-05, + "loss": 0.174530029296875, + "step": 3764 + }, + { + "epoch": 0.2544950655671218, + "grad_norm": 0.18488144874572754, + "learning_rate": 2.632307261775202e-05, + "loss": 0.025394439697265625, + "step": 3765 + }, + { + "epoch": 0.254562660538056, + "grad_norm": 0.47475728392601013, + "learning_rate": 2.632090827205147e-05, + "loss": 0.10291290283203125, + "step": 3766 + }, + { + "epoch": 0.25463025550899016, + "grad_norm": 0.7743923664093018, + "learning_rate": 2.6318743378574972e-05, + "loss": 0.13399124145507812, + "step": 3767 + }, + { + "epoch": 0.25469785047992427, + "grad_norm": 0.3920100927352905, + "learning_rate": 2.6316577937427282e-05, + "loss": 0.10927581787109375, + "step": 3768 + }, + { + "epoch": 0.25476544545085844, + "grad_norm": 1.0698528289794922, + "learning_rate": 2.6314411948713168e-05, + "loss": 0.1685791015625, + "step": 3769 + }, + { + "epoch": 0.2548330404217926, + "grad_norm": 1.3202927112579346, + "learning_rate": 2.6312245412537444e-05, + "loss": 0.2182159423828125, + "step": 3770 + }, + { + "epoch": 0.2549006353927268, + "grad_norm": 0.43057993054389954, + "learning_rate": 2.631007832900494e-05, + "loss": 0.11114501953125, + "step": 3771 + }, + { + "epoch": 0.25496823036366095, + "grad_norm": 0.320300430059433, + "learning_rate": 2.6307910698220507e-05, + "loss": 0.041568756103515625, + "step": 3772 + }, + { + "epoch": 0.2550358253345951, + "grad_norm": 0.2622952163219452, + "learning_rate": 2.630574252028903e-05, + "loss": 0.02162933349609375, + "step": 3773 + }, + { + "epoch": 0.2551034203055293, + "grad_norm": 0.7615636587142944, + "learning_rate": 2.630357379531542e-05, + "loss": 0.1443023681640625, + "step": 3774 + }, + { + "epoch": 0.2551710152764634, + "grad_norm": 0.3913322985172272, + "learning_rate": 2.6301404523404616e-05, + "loss": 0.06293582916259766, + "step": 3775 + }, + { + "epoch": 0.2552386102473976, + "grad_norm": 0.2891232669353485, + "learning_rate": 2.6299234704661573e-05, + "loss": 0.03734588623046875, + "step": 3776 + }, + { + "epoch": 0.25530620521833175, + "grad_norm": 0.2711588740348816, + "learning_rate": 2.6297064339191292e-05, + "loss": 0.0357208251953125, + "step": 3777 + }, + { + "epoch": 0.2553738001892659, + "grad_norm": 1.124017596244812, + "learning_rate": 2.629489342709878e-05, + "loss": 0.154510498046875, + "step": 3778 + }, + { + "epoch": 0.2554413951602001, + "grad_norm": 3.2351157665252686, + "learning_rate": 2.6292721968489084e-05, + "loss": 0.18825531005859375, + "step": 3779 + }, + { + "epoch": 0.25550899013113426, + "grad_norm": 0.6854618191719055, + "learning_rate": 2.629054996346727e-05, + "loss": 0.1346893310546875, + "step": 3780 + }, + { + "epoch": 0.25557658510206843, + "grad_norm": 0.6412948966026306, + "learning_rate": 2.628837741213843e-05, + "loss": 0.1414947509765625, + "step": 3781 + }, + { + "epoch": 0.25564418007300255, + "grad_norm": 0.7941072583198547, + "learning_rate": 2.628620431460769e-05, + "loss": 0.203887939453125, + "step": 3782 + }, + { + "epoch": 0.2557117750439367, + "grad_norm": 0.854989767074585, + "learning_rate": 2.6284030670980198e-05, + "loss": 0.1466522216796875, + "step": 3783 + }, + { + "epoch": 0.2557793700148709, + "grad_norm": 0.2896368205547333, + "learning_rate": 2.628185648136113e-05, + "loss": 0.06337738037109375, + "step": 3784 + }, + { + "epoch": 0.25584696498580506, + "grad_norm": 0.35808485746383667, + "learning_rate": 2.6279681745855685e-05, + "loss": 0.04709625244140625, + "step": 3785 + }, + { + "epoch": 0.25591455995673923, + "grad_norm": 0.5964555740356445, + "learning_rate": 2.6277506464569088e-05, + "loss": 0.08542633056640625, + "step": 3786 + }, + { + "epoch": 0.2559821549276734, + "grad_norm": 0.7469651103019714, + "learning_rate": 2.6275330637606593e-05, + "loss": 0.1004638671875, + "step": 3787 + }, + { + "epoch": 0.25604974989860757, + "grad_norm": 0.3288889527320862, + "learning_rate": 2.627315426507348e-05, + "loss": 0.07508087158203125, + "step": 3788 + }, + { + "epoch": 0.2561173448695417, + "grad_norm": 1.5954405069351196, + "learning_rate": 2.627097734707506e-05, + "loss": 0.1956024169921875, + "step": 3789 + }, + { + "epoch": 0.25618493984047586, + "grad_norm": 1.0394481420516968, + "learning_rate": 2.6268799883716662e-05, + "loss": 0.174163818359375, + "step": 3790 + }, + { + "epoch": 0.25625253481141, + "grad_norm": 1.5960415601730347, + "learning_rate": 2.626662187510364e-05, + "loss": 0.2196807861328125, + "step": 3791 + }, + { + "epoch": 0.2563201297823442, + "grad_norm": 0.48090484738349915, + "learning_rate": 2.626444332134139e-05, + "loss": 0.0714569091796875, + "step": 3792 + }, + { + "epoch": 0.25638772475327837, + "grad_norm": 1.6574479341506958, + "learning_rate": 2.626226422253532e-05, + "loss": 0.228179931640625, + "step": 3793 + }, + { + "epoch": 0.25645531972421254, + "grad_norm": 0.912492573261261, + "learning_rate": 2.6260084578790863e-05, + "loss": 0.195098876953125, + "step": 3794 + }, + { + "epoch": 0.2565229146951467, + "grad_norm": 0.6240761876106262, + "learning_rate": 2.6257904390213495e-05, + "loss": 0.17315673828125, + "step": 3795 + }, + { + "epoch": 0.2565905096660808, + "grad_norm": 0.4170895516872406, + "learning_rate": 2.6255723656908697e-05, + "loss": 0.0689697265625, + "step": 3796 + }, + { + "epoch": 0.256658104637015, + "grad_norm": 0.8506390452384949, + "learning_rate": 2.6253542378981992e-05, + "loss": 0.13636016845703125, + "step": 3797 + }, + { + "epoch": 0.25672569960794916, + "grad_norm": 0.7894283533096313, + "learning_rate": 2.625136055653892e-05, + "loss": 0.1693115234375, + "step": 3798 + }, + { + "epoch": 0.25679329457888334, + "grad_norm": 0.5212503671646118, + "learning_rate": 2.6249178189685052e-05, + "loss": 0.111602783203125, + "step": 3799 + }, + { + "epoch": 0.2568608895498175, + "grad_norm": 0.7346773147583008, + "learning_rate": 2.6246995278525983e-05, + "loss": 0.1533966064453125, + "step": 3800 + }, + { + "epoch": 0.2569284845207517, + "grad_norm": 0.6057413220405579, + "learning_rate": 2.6244811823167347e-05, + "loss": 0.186614990234375, + "step": 3801 + }, + { + "epoch": 0.2569960794916858, + "grad_norm": 0.6097314953804016, + "learning_rate": 2.6242627823714773e-05, + "loss": 0.11943817138671875, + "step": 3802 + }, + { + "epoch": 0.25706367446261996, + "grad_norm": 0.2705419361591339, + "learning_rate": 2.6240443280273957e-05, + "loss": 0.05126190185546875, + "step": 3803 + }, + { + "epoch": 0.25713126943355413, + "grad_norm": 1.2408095598220825, + "learning_rate": 2.623825819295059e-05, + "loss": 0.229644775390625, + "step": 3804 + }, + { + "epoch": 0.2571988644044883, + "grad_norm": 0.15759561955928802, + "learning_rate": 2.6236072561850396e-05, + "loss": 0.03562164306640625, + "step": 3805 + }, + { + "epoch": 0.2572664593754225, + "grad_norm": 0.38053596019744873, + "learning_rate": 2.6233886387079137e-05, + "loss": 0.072052001953125, + "step": 3806 + }, + { + "epoch": 0.25733405434635664, + "grad_norm": 0.424437940120697, + "learning_rate": 2.6231699668742594e-05, + "loss": 0.05329132080078125, + "step": 3807 + }, + { + "epoch": 0.2574016493172908, + "grad_norm": 1.74600088596344, + "learning_rate": 2.622951240694657e-05, + "loss": 0.3055419921875, + "step": 3808 + }, + { + "epoch": 0.25746924428822493, + "grad_norm": 1.2080708742141724, + "learning_rate": 2.62273246017969e-05, + "loss": 0.219970703125, + "step": 3809 + }, + { + "epoch": 0.2575368392591591, + "grad_norm": 0.4500761032104492, + "learning_rate": 2.6225136253399446e-05, + "loss": 0.0858917236328125, + "step": 3810 + }, + { + "epoch": 0.25760443423009327, + "grad_norm": 0.5917719006538391, + "learning_rate": 2.622294736186009e-05, + "loss": 0.13055419921875, + "step": 3811 + }, + { + "epoch": 0.25767202920102744, + "grad_norm": 0.2116750031709671, + "learning_rate": 2.6220757927284752e-05, + "loss": 0.03485870361328125, + "step": 3812 + }, + { + "epoch": 0.2577396241719616, + "grad_norm": 0.43287599086761475, + "learning_rate": 2.6218567949779358e-05, + "loss": 0.07208633422851562, + "step": 3813 + }, + { + "epoch": 0.2578072191428958, + "grad_norm": 0.310771107673645, + "learning_rate": 2.6216377429449877e-05, + "loss": 0.0477142333984375, + "step": 3814 + }, + { + "epoch": 0.25787481411382995, + "grad_norm": 0.20315606892108917, + "learning_rate": 2.6214186366402305e-05, + "loss": 0.03619384765625, + "step": 3815 + }, + { + "epoch": 0.25794240908476407, + "grad_norm": 0.3538917005062103, + "learning_rate": 2.6211994760742658e-05, + "loss": 0.0696868896484375, + "step": 3816 + }, + { + "epoch": 0.25801000405569824, + "grad_norm": 0.5835238099098206, + "learning_rate": 2.620980261257697e-05, + "loss": 0.07294464111328125, + "step": 3817 + }, + { + "epoch": 0.2580775990266324, + "grad_norm": 0.803997278213501, + "learning_rate": 2.620760992201133e-05, + "loss": 0.10428237915039062, + "step": 3818 + }, + { + "epoch": 0.2581451939975666, + "grad_norm": 0.389324814081192, + "learning_rate": 2.6205416689151815e-05, + "loss": 0.08899307250976562, + "step": 3819 + }, + { + "epoch": 0.25821278896850075, + "grad_norm": 0.8219342827796936, + "learning_rate": 2.620322291410456e-05, + "loss": 0.18695068359375, + "step": 3820 + }, + { + "epoch": 0.2582803839394349, + "grad_norm": 0.3371073305606842, + "learning_rate": 2.6201028596975704e-05, + "loss": 0.05283546447753906, + "step": 3821 + }, + { + "epoch": 0.2583479789103691, + "grad_norm": 0.7464929223060608, + "learning_rate": 2.6198833737871428e-05, + "loss": 0.1220703125, + "step": 3822 + }, + { + "epoch": 0.2584155738813032, + "grad_norm": 0.49479198455810547, + "learning_rate": 2.619663833689793e-05, + "loss": 0.04314422607421875, + "step": 3823 + }, + { + "epoch": 0.2584831688522374, + "grad_norm": 1.1666452884674072, + "learning_rate": 2.6194442394161438e-05, + "loss": 0.197784423828125, + "step": 3824 + }, + { + "epoch": 0.25855076382317155, + "grad_norm": 0.6350959539413452, + "learning_rate": 2.6192245909768208e-05, + "loss": 0.12204742431640625, + "step": 3825 + }, + { + "epoch": 0.2586183587941057, + "grad_norm": 1.268030047416687, + "learning_rate": 2.6190048883824514e-05, + "loss": 0.270782470703125, + "step": 3826 + }, + { + "epoch": 0.2586859537650399, + "grad_norm": 0.9495996236801147, + "learning_rate": 2.6187851316436665e-05, + "loss": 0.15041351318359375, + "step": 3827 + }, + { + "epoch": 0.25875354873597406, + "grad_norm": 0.5046117305755615, + "learning_rate": 2.6185653207710995e-05, + "loss": 0.05820655822753906, + "step": 3828 + }, + { + "epoch": 0.25882114370690823, + "grad_norm": 0.5008918642997742, + "learning_rate": 2.6183454557753857e-05, + "loss": 0.0984344482421875, + "step": 3829 + }, + { + "epoch": 0.25888873867784234, + "grad_norm": 0.8120920062065125, + "learning_rate": 2.6181255366671645e-05, + "loss": 0.1549072265625, + "step": 3830 + }, + { + "epoch": 0.2589563336487765, + "grad_norm": 0.5008509159088135, + "learning_rate": 2.6179055634570757e-05, + "loss": 0.0733642578125, + "step": 3831 + }, + { + "epoch": 0.2590239286197107, + "grad_norm": 0.3808441162109375, + "learning_rate": 2.6176855361557633e-05, + "loss": 0.047607421875, + "step": 3832 + }, + { + "epoch": 0.25909152359064486, + "grad_norm": 0.291778028011322, + "learning_rate": 2.6174654547738744e-05, + "loss": 0.06229400634765625, + "step": 3833 + }, + { + "epoch": 0.259159118561579, + "grad_norm": 0.7364521026611328, + "learning_rate": 2.617245319322057e-05, + "loss": 0.1513671875, + "step": 3834 + }, + { + "epoch": 0.2592267135325132, + "grad_norm": 0.5597672462463379, + "learning_rate": 2.6170251298109632e-05, + "loss": 0.115936279296875, + "step": 3835 + }, + { + "epoch": 0.25929430850344737, + "grad_norm": 2.1514081954956055, + "learning_rate": 2.616804886251247e-05, + "loss": 0.1934356689453125, + "step": 3836 + }, + { + "epoch": 0.2593619034743815, + "grad_norm": 0.4592786729335785, + "learning_rate": 2.616584588653565e-05, + "loss": 0.0813140869140625, + "step": 3837 + }, + { + "epoch": 0.25942949844531565, + "grad_norm": 0.7458189725875854, + "learning_rate": 2.6163642370285765e-05, + "loss": 0.1002960205078125, + "step": 3838 + }, + { + "epoch": 0.2594970934162498, + "grad_norm": 0.3021528124809265, + "learning_rate": 2.6161438313869438e-05, + "loss": 0.03882598876953125, + "step": 3839 + }, + { + "epoch": 0.259564688387184, + "grad_norm": 0.17799019813537598, + "learning_rate": 2.615923371739331e-05, + "loss": 0.040374755859375, + "step": 3840 + }, + { + "epoch": 0.25963228335811817, + "grad_norm": 0.2723516523838043, + "learning_rate": 2.615702858096406e-05, + "loss": 0.033355712890625, + "step": 3841 + }, + { + "epoch": 0.25969987832905234, + "grad_norm": 1.0488862991333008, + "learning_rate": 2.615482290468838e-05, + "loss": 0.16873931884765625, + "step": 3842 + }, + { + "epoch": 0.2597674732999865, + "grad_norm": 0.7246381640434265, + "learning_rate": 2.6152616688672997e-05, + "loss": 0.1139678955078125, + "step": 3843 + }, + { + "epoch": 0.2598350682709206, + "grad_norm": 0.2047640085220337, + "learning_rate": 2.615040993302466e-05, + "loss": 0.040477752685546875, + "step": 3844 + }, + { + "epoch": 0.2599026632418548, + "grad_norm": 0.3850698173046112, + "learning_rate": 2.6148202637850148e-05, + "loss": 0.0688018798828125, + "step": 3845 + }, + { + "epoch": 0.25997025821278896, + "grad_norm": 0.6436794996261597, + "learning_rate": 2.6145994803256262e-05, + "loss": 0.150848388671875, + "step": 3846 + }, + { + "epoch": 0.26003785318372313, + "grad_norm": 0.46409913897514343, + "learning_rate": 2.6143786429349834e-05, + "loss": 0.0876312255859375, + "step": 3847 + }, + { + "epoch": 0.2601054481546573, + "grad_norm": 0.3215232491493225, + "learning_rate": 2.6141577516237712e-05, + "loss": 0.07415771484375, + "step": 3848 + }, + { + "epoch": 0.2601730431255915, + "grad_norm": 0.6779897212982178, + "learning_rate": 2.613936806402678e-05, + "loss": 0.1472320556640625, + "step": 3849 + }, + { + "epoch": 0.26024063809652564, + "grad_norm": 0.6779241561889648, + "learning_rate": 2.6137158072823955e-05, + "loss": 0.15643310546875, + "step": 3850 + }, + { + "epoch": 0.26030823306745976, + "grad_norm": 0.8584126234054565, + "learning_rate": 2.6134947542736152e-05, + "loss": 0.201995849609375, + "step": 3851 + }, + { + "epoch": 0.26037582803839393, + "grad_norm": 0.38220587372779846, + "learning_rate": 2.6132736473870346e-05, + "loss": 0.0837249755859375, + "step": 3852 + }, + { + "epoch": 0.2604434230093281, + "grad_norm": 0.533758819103241, + "learning_rate": 2.6130524866333513e-05, + "loss": 0.101837158203125, + "step": 3853 + }, + { + "epoch": 0.26051101798026227, + "grad_norm": 1.148139476776123, + "learning_rate": 2.6128312720232665e-05, + "loss": 0.18506622314453125, + "step": 3854 + }, + { + "epoch": 0.26057861295119644, + "grad_norm": 0.5684641599655151, + "learning_rate": 2.6126100035674846e-05, + "loss": 0.09909820556640625, + "step": 3855 + }, + { + "epoch": 0.2606462079221306, + "grad_norm": 0.4055735170841217, + "learning_rate": 2.6123886812767108e-05, + "loss": 0.07303619384765625, + "step": 3856 + }, + { + "epoch": 0.2607138028930648, + "grad_norm": 1.4422879219055176, + "learning_rate": 2.6121673051616552e-05, + "loss": 0.24395751953125, + "step": 3857 + }, + { + "epoch": 0.2607813978639989, + "grad_norm": 0.6310803890228271, + "learning_rate": 2.6119458752330284e-05, + "loss": 0.13397216796875, + "step": 3858 + }, + { + "epoch": 0.26084899283493307, + "grad_norm": 0.4585469663143158, + "learning_rate": 2.6117243915015458e-05, + "loss": 0.08176422119140625, + "step": 3859 + }, + { + "epoch": 0.26091658780586724, + "grad_norm": 0.4775680899620056, + "learning_rate": 2.611502853977923e-05, + "loss": 0.07416152954101562, + "step": 3860 + }, + { + "epoch": 0.2609841827768014, + "grad_norm": 0.5292770266532898, + "learning_rate": 2.6112812626728796e-05, + "loss": 0.05709075927734375, + "step": 3861 + }, + { + "epoch": 0.2610517777477356, + "grad_norm": 0.31509533524513245, + "learning_rate": 2.611059617597138e-05, + "loss": 0.037761688232421875, + "step": 3862 + }, + { + "epoch": 0.26111937271866975, + "grad_norm": 0.8354026079177856, + "learning_rate": 2.6108379187614225e-05, + "loss": 0.13959503173828125, + "step": 3863 + }, + { + "epoch": 0.26118696768960387, + "grad_norm": 0.9779190421104431, + "learning_rate": 2.61061616617646e-05, + "loss": 0.15147781372070312, + "step": 3864 + }, + { + "epoch": 0.26125456266053804, + "grad_norm": 0.42845863103866577, + "learning_rate": 2.6103943598529808e-05, + "loss": 0.0709686279296875, + "step": 3865 + }, + { + "epoch": 0.2613221576314722, + "grad_norm": 0.804009735584259, + "learning_rate": 2.6101724998017167e-05, + "loss": 0.180694580078125, + "step": 3866 + }, + { + "epoch": 0.2613897526024064, + "grad_norm": 0.5448235273361206, + "learning_rate": 2.609950586033403e-05, + "loss": 0.13492584228515625, + "step": 3867 + }, + { + "epoch": 0.26145734757334055, + "grad_norm": 0.6507728695869446, + "learning_rate": 2.609728618558778e-05, + "loss": 0.143280029296875, + "step": 3868 + }, + { + "epoch": 0.2615249425442747, + "grad_norm": 0.6918888688087463, + "learning_rate": 2.60950659738858e-05, + "loss": 0.1228790283203125, + "step": 3869 + }, + { + "epoch": 0.2615925375152089, + "grad_norm": 0.7406148314476013, + "learning_rate": 2.6092845225335533e-05, + "loss": 0.13085174560546875, + "step": 3870 + }, + { + "epoch": 0.261660132486143, + "grad_norm": 0.3337212800979614, + "learning_rate": 2.609062394004443e-05, + "loss": 0.0512237548828125, + "step": 3871 + }, + { + "epoch": 0.2617277274570772, + "grad_norm": 1.8589630126953125, + "learning_rate": 2.608840211811997e-05, + "loss": 0.17830276489257812, + "step": 3872 + }, + { + "epoch": 0.26179532242801135, + "grad_norm": 0.6777418255805969, + "learning_rate": 2.6086179759669654e-05, + "loss": 0.12419509887695312, + "step": 3873 + }, + { + "epoch": 0.2618629173989455, + "grad_norm": 0.7294570803642273, + "learning_rate": 2.608395686480102e-05, + "loss": 0.191436767578125, + "step": 3874 + }, + { + "epoch": 0.2619305123698797, + "grad_norm": 0.8130529522895813, + "learning_rate": 2.6081733433621622e-05, + "loss": 0.1416168212890625, + "step": 3875 + }, + { + "epoch": 0.26199810734081386, + "grad_norm": 0.6595593094825745, + "learning_rate": 2.607950946623904e-05, + "loss": 0.0977325439453125, + "step": 3876 + }, + { + "epoch": 0.26206570231174803, + "grad_norm": 0.7180932760238647, + "learning_rate": 2.6077284962760895e-05, + "loss": 0.2203216552734375, + "step": 3877 + }, + { + "epoch": 0.26213329728268214, + "grad_norm": 1.0311014652252197, + "learning_rate": 2.6075059923294805e-05, + "loss": 0.192352294921875, + "step": 3878 + }, + { + "epoch": 0.2622008922536163, + "grad_norm": 0.4596746861934662, + "learning_rate": 2.6072834347948448e-05, + "loss": 0.11524200439453125, + "step": 3879 + }, + { + "epoch": 0.2622684872245505, + "grad_norm": 0.4449482262134552, + "learning_rate": 2.6070608236829503e-05, + "loss": 0.04905891418457031, + "step": 3880 + }, + { + "epoch": 0.26233608219548465, + "grad_norm": 0.5756531953811646, + "learning_rate": 2.6068381590045683e-05, + "loss": 0.1484527587890625, + "step": 3881 + }, + { + "epoch": 0.2624036771664188, + "grad_norm": 0.3451641798019409, + "learning_rate": 2.6066154407704725e-05, + "loss": 0.07042694091796875, + "step": 3882 + }, + { + "epoch": 0.262471272137353, + "grad_norm": 0.5541878342628479, + "learning_rate": 2.60639266899144e-05, + "loss": 0.0994110107421875, + "step": 3883 + }, + { + "epoch": 0.26253886710828717, + "grad_norm": 0.12705780565738678, + "learning_rate": 2.6061698436782496e-05, + "loss": 0.014059066772460938, + "step": 3884 + }, + { + "epoch": 0.2626064620792213, + "grad_norm": 0.5400370359420776, + "learning_rate": 2.6059469648416832e-05, + "loss": 0.10515022277832031, + "step": 3885 + }, + { + "epoch": 0.26267405705015545, + "grad_norm": 1.00910484790802, + "learning_rate": 2.605724032492524e-05, + "loss": 0.12786865234375, + "step": 3886 + }, + { + "epoch": 0.2627416520210896, + "grad_norm": 0.41400378942489624, + "learning_rate": 2.605501046641561e-05, + "loss": 0.082427978515625, + "step": 3887 + }, + { + "epoch": 0.2628092469920238, + "grad_norm": 0.7959262132644653, + "learning_rate": 2.6052780072995813e-05, + "loss": 0.199981689453125, + "step": 3888 + }, + { + "epoch": 0.26287684196295796, + "grad_norm": 0.6999860405921936, + "learning_rate": 2.6050549144773782e-05, + "loss": 0.1348876953125, + "step": 3889 + }, + { + "epoch": 0.26294443693389213, + "grad_norm": 1.5835314989089966, + "learning_rate": 2.6048317681857463e-05, + "loss": 0.194122314453125, + "step": 3890 + }, + { + "epoch": 0.2630120319048263, + "grad_norm": 0.33688119053840637, + "learning_rate": 2.6046085684354825e-05, + "loss": 0.04624176025390625, + "step": 3891 + }, + { + "epoch": 0.2630796268757604, + "grad_norm": 1.787337303161621, + "learning_rate": 2.6043853152373863e-05, + "loss": 0.1484527587890625, + "step": 3892 + }, + { + "epoch": 0.2631472218466946, + "grad_norm": 0.44518378376960754, + "learning_rate": 2.604162008602261e-05, + "loss": 0.10467147827148438, + "step": 3893 + }, + { + "epoch": 0.26321481681762876, + "grad_norm": 0.9667565822601318, + "learning_rate": 2.6039386485409105e-05, + "loss": 0.1587066650390625, + "step": 3894 + }, + { + "epoch": 0.26328241178856293, + "grad_norm": 0.868855893611908, + "learning_rate": 2.6037152350641432e-05, + "loss": 0.111907958984375, + "step": 3895 + }, + { + "epoch": 0.2633500067594971, + "grad_norm": 0.9086287617683411, + "learning_rate": 2.603491768182769e-05, + "loss": 0.1915130615234375, + "step": 3896 + }, + { + "epoch": 0.2634176017304313, + "grad_norm": 0.8255077004432678, + "learning_rate": 2.6032682479076004e-05, + "loss": 0.12535858154296875, + "step": 3897 + }, + { + "epoch": 0.26348519670136544, + "grad_norm": 1.179293155670166, + "learning_rate": 2.6030446742494526e-05, + "loss": 0.149871826171875, + "step": 3898 + }, + { + "epoch": 0.26355279167229956, + "grad_norm": 0.7458241581916809, + "learning_rate": 2.602821047219144e-05, + "loss": 0.13671875, + "step": 3899 + }, + { + "epoch": 0.26362038664323373, + "grad_norm": 0.4610196352005005, + "learning_rate": 2.6025973668274945e-05, + "loss": 0.09796142578125, + "step": 3900 + }, + { + "epoch": 0.2636879816141679, + "grad_norm": 0.7412941455841064, + "learning_rate": 2.6023736330853276e-05, + "loss": 0.1472625732421875, + "step": 3901 + }, + { + "epoch": 0.26375557658510207, + "grad_norm": 0.7478082776069641, + "learning_rate": 2.6021498460034686e-05, + "loss": 0.170257568359375, + "step": 3902 + }, + { + "epoch": 0.26382317155603624, + "grad_norm": 0.7044065594673157, + "learning_rate": 2.601926005592746e-05, + "loss": 0.110626220703125, + "step": 3903 + }, + { + "epoch": 0.2638907665269704, + "grad_norm": 1.1111363172531128, + "learning_rate": 2.6017021118639903e-05, + "loss": 0.232513427734375, + "step": 3904 + }, + { + "epoch": 0.2639583614979046, + "grad_norm": 0.6561105847358704, + "learning_rate": 2.6014781648280347e-05, + "loss": 0.12164306640625, + "step": 3905 + }, + { + "epoch": 0.2640259564688387, + "grad_norm": 0.6582751870155334, + "learning_rate": 2.601254164495716e-05, + "loss": 0.13147735595703125, + "step": 3906 + }, + { + "epoch": 0.26409355143977287, + "grad_norm": 0.964393675327301, + "learning_rate": 2.6010301108778722e-05, + "loss": 0.1280975341796875, + "step": 3907 + }, + { + "epoch": 0.26416114641070704, + "grad_norm": 1.6533854007720947, + "learning_rate": 2.6008060039853442e-05, + "loss": 0.19748687744140625, + "step": 3908 + }, + { + "epoch": 0.2642287413816412, + "grad_norm": 0.8420433402061462, + "learning_rate": 2.600581843828976e-05, + "loss": 0.164764404296875, + "step": 3909 + }, + { + "epoch": 0.2642963363525754, + "grad_norm": 0.7274805307388306, + "learning_rate": 2.6003576304196136e-05, + "loss": 0.07792282104492188, + "step": 3910 + }, + { + "epoch": 0.26436393132350955, + "grad_norm": 1.2346200942993164, + "learning_rate": 2.6001333637681058e-05, + "loss": 0.20684814453125, + "step": 3911 + }, + { + "epoch": 0.2644315262944437, + "grad_norm": 1.1596972942352295, + "learning_rate": 2.5999090438853048e-05, + "loss": 0.215576171875, + "step": 3912 + }, + { + "epoch": 0.26449912126537783, + "grad_norm": 0.38922905921936035, + "learning_rate": 2.5996846707820633e-05, + "loss": 0.061553955078125, + "step": 3913 + }, + { + "epoch": 0.264566716236312, + "grad_norm": 0.4237656891345978, + "learning_rate": 2.5994602444692394e-05, + "loss": 0.08594894409179688, + "step": 3914 + }, + { + "epoch": 0.2646343112072462, + "grad_norm": 1.5329910516738892, + "learning_rate": 2.5992357649576907e-05, + "loss": 0.266357421875, + "step": 3915 + }, + { + "epoch": 0.26470190617818035, + "grad_norm": 1.1055482625961304, + "learning_rate": 2.5990112322582798e-05, + "loss": 0.236968994140625, + "step": 3916 + }, + { + "epoch": 0.2647695011491145, + "grad_norm": 0.419559508562088, + "learning_rate": 2.598786646381871e-05, + "loss": 0.0568084716796875, + "step": 3917 + }, + { + "epoch": 0.2648370961200487, + "grad_norm": 1.2133435010910034, + "learning_rate": 2.5985620073393306e-05, + "loss": 0.179595947265625, + "step": 3918 + }, + { + "epoch": 0.26490469109098286, + "grad_norm": 1.6687802076339722, + "learning_rate": 2.598337315141529e-05, + "loss": 0.31390380859375, + "step": 3919 + }, + { + "epoch": 0.264972286061917, + "grad_norm": 0.7856985926628113, + "learning_rate": 2.598112569799337e-05, + "loss": 0.13527297973632812, + "step": 3920 + }, + { + "epoch": 0.26503988103285114, + "grad_norm": 0.29757562279701233, + "learning_rate": 2.59788777132363e-05, + "loss": 0.0446929931640625, + "step": 3921 + }, + { + "epoch": 0.2651074760037853, + "grad_norm": 0.4346763491630554, + "learning_rate": 2.597662919725285e-05, + "loss": 0.075225830078125, + "step": 3922 + }, + { + "epoch": 0.2651750709747195, + "grad_norm": 0.9118396639823914, + "learning_rate": 2.5974380150151815e-05, + "loss": 0.2078857421875, + "step": 3923 + }, + { + "epoch": 0.26524266594565366, + "grad_norm": 0.3806628882884979, + "learning_rate": 2.5972130572042024e-05, + "loss": 0.033355712890625, + "step": 3924 + }, + { + "epoch": 0.2653102609165878, + "grad_norm": 0.5119176506996155, + "learning_rate": 2.5969880463032314e-05, + "loss": 0.11490631103515625, + "step": 3925 + }, + { + "epoch": 0.26537785588752194, + "grad_norm": 1.1538798809051514, + "learning_rate": 2.5967629823231573e-05, + "loss": 0.2149658203125, + "step": 3926 + }, + { + "epoch": 0.2654454508584561, + "grad_norm": 1.3749502897262573, + "learning_rate": 2.596537865274869e-05, + "loss": 0.1432647705078125, + "step": 3927 + }, + { + "epoch": 0.2655130458293903, + "grad_norm": 0.8877873420715332, + "learning_rate": 2.5963126951692596e-05, + "loss": 0.148193359375, + "step": 3928 + }, + { + "epoch": 0.26558064080032445, + "grad_norm": 0.4643828272819519, + "learning_rate": 2.596087472017224e-05, + "loss": 0.0772857666015625, + "step": 3929 + }, + { + "epoch": 0.2656482357712586, + "grad_norm": 1.0778957605361938, + "learning_rate": 2.59586219582966e-05, + "loss": 0.214630126953125, + "step": 3930 + }, + { + "epoch": 0.2657158307421928, + "grad_norm": 0.5662257075309753, + "learning_rate": 2.5956368666174683e-05, + "loss": 0.1015472412109375, + "step": 3931 + }, + { + "epoch": 0.26578342571312696, + "grad_norm": 0.6104979515075684, + "learning_rate": 2.5954114843915505e-05, + "loss": 0.128265380859375, + "step": 3932 + }, + { + "epoch": 0.2658510206840611, + "grad_norm": 0.2854134142398834, + "learning_rate": 2.5951860491628133e-05, + "loss": 0.04031181335449219, + "step": 3933 + }, + { + "epoch": 0.26591861565499525, + "grad_norm": 0.711726725101471, + "learning_rate": 2.5949605609421642e-05, + "loss": 0.11166000366210938, + "step": 3934 + }, + { + "epoch": 0.2659862106259294, + "grad_norm": 1.47169828414917, + "learning_rate": 2.5947350197405136e-05, + "loss": 0.209716796875, + "step": 3935 + }, + { + "epoch": 0.2660538055968636, + "grad_norm": 0.3856250047683716, + "learning_rate": 2.5945094255687744e-05, + "loss": 0.061077117919921875, + "step": 3936 + }, + { + "epoch": 0.26612140056779776, + "grad_norm": 0.3507581651210785, + "learning_rate": 2.594283778437863e-05, + "loss": 0.0711212158203125, + "step": 3937 + }, + { + "epoch": 0.26618899553873193, + "grad_norm": 0.5152037143707275, + "learning_rate": 2.594058078358697e-05, + "loss": 0.09281158447265625, + "step": 3938 + }, + { + "epoch": 0.2662565905096661, + "grad_norm": 0.3159099519252777, + "learning_rate": 2.5938323253421973e-05, + "loss": 0.051219940185546875, + "step": 3939 + }, + { + "epoch": 0.2663241854806002, + "grad_norm": 0.3645246922969818, + "learning_rate": 2.5936065193992867e-05, + "loss": 0.043918609619140625, + "step": 3940 + }, + { + "epoch": 0.2663917804515344, + "grad_norm": 0.9506338834762573, + "learning_rate": 2.593380660540892e-05, + "loss": 0.16143798828125, + "step": 3941 + }, + { + "epoch": 0.26645937542246856, + "grad_norm": 0.5399342179298401, + "learning_rate": 2.5931547487779416e-05, + "loss": 0.1337127685546875, + "step": 3942 + }, + { + "epoch": 0.26652697039340273, + "grad_norm": 0.4837833642959595, + "learning_rate": 2.592928784121366e-05, + "loss": 0.135894775390625, + "step": 3943 + }, + { + "epoch": 0.2665945653643369, + "grad_norm": 0.389931857585907, + "learning_rate": 2.5927027665820987e-05, + "loss": 0.07683563232421875, + "step": 3944 + }, + { + "epoch": 0.26666216033527107, + "grad_norm": 0.3463909924030304, + "learning_rate": 2.5924766961710763e-05, + "loss": 0.083465576171875, + "step": 3945 + }, + { + "epoch": 0.26672975530620524, + "grad_norm": 0.19933949410915375, + "learning_rate": 2.5922505728992375e-05, + "loss": 0.04349517822265625, + "step": 3946 + }, + { + "epoch": 0.26679735027713936, + "grad_norm": 0.8259920477867126, + "learning_rate": 2.5920243967775228e-05, + "loss": 0.220947265625, + "step": 3947 + }, + { + "epoch": 0.2668649452480735, + "grad_norm": 0.24428790807724, + "learning_rate": 2.591798167816877e-05, + "loss": 0.053466796875, + "step": 3948 + }, + { + "epoch": 0.2669325402190077, + "grad_norm": 0.21258386969566345, + "learning_rate": 2.591571886028246e-05, + "loss": 0.038974761962890625, + "step": 3949 + }, + { + "epoch": 0.26700013518994187, + "grad_norm": 0.7498965859413147, + "learning_rate": 2.5913455514225783e-05, + "loss": 0.1119232177734375, + "step": 3950 + }, + { + "epoch": 0.26706773016087604, + "grad_norm": 1.1536171436309814, + "learning_rate": 2.5911191640108262e-05, + "loss": 0.225738525390625, + "step": 3951 + }, + { + "epoch": 0.2671353251318102, + "grad_norm": 0.3290102481842041, + "learning_rate": 2.5908927238039435e-05, + "loss": 0.054019927978515625, + "step": 3952 + }, + { + "epoch": 0.2672029201027444, + "grad_norm": 0.6833217144012451, + "learning_rate": 2.5906662308128865e-05, + "loss": 0.1107330322265625, + "step": 3953 + }, + { + "epoch": 0.2672705150736785, + "grad_norm": 0.19554923474788666, + "learning_rate": 2.5904396850486146e-05, + "loss": 0.04400634765625, + "step": 3954 + }, + { + "epoch": 0.26733811004461266, + "grad_norm": 0.3245750069618225, + "learning_rate": 2.590213086522089e-05, + "loss": 0.056644439697265625, + "step": 3955 + }, + { + "epoch": 0.26740570501554684, + "grad_norm": 0.3489871621131897, + "learning_rate": 2.5899864352442746e-05, + "loss": 0.0677642822265625, + "step": 3956 + }, + { + "epoch": 0.267473299986481, + "grad_norm": 0.282939076423645, + "learning_rate": 2.5897597312261375e-05, + "loss": 0.04888153076171875, + "step": 3957 + }, + { + "epoch": 0.2675408949574152, + "grad_norm": 0.49416762590408325, + "learning_rate": 2.589532974478648e-05, + "loss": 0.0950469970703125, + "step": 3958 + }, + { + "epoch": 0.26760848992834935, + "grad_norm": 0.2631983160972595, + "learning_rate": 2.589306165012777e-05, + "loss": 0.05034637451171875, + "step": 3959 + }, + { + "epoch": 0.2676760848992835, + "grad_norm": 1.6827325820922852, + "learning_rate": 2.5890793028394998e-05, + "loss": 0.22235107421875, + "step": 3960 + }, + { + "epoch": 0.26774367987021763, + "grad_norm": 0.38166719675064087, + "learning_rate": 2.588852387969793e-05, + "loss": 0.08085250854492188, + "step": 3961 + }, + { + "epoch": 0.2678112748411518, + "grad_norm": 0.9631079435348511, + "learning_rate": 2.588625420414636e-05, + "loss": 0.12293243408203125, + "step": 3962 + }, + { + "epoch": 0.267878869812086, + "grad_norm": 0.5906422734260559, + "learning_rate": 2.588398400185011e-05, + "loss": 0.09326553344726562, + "step": 3963 + }, + { + "epoch": 0.26794646478302014, + "grad_norm": 0.8203309774398804, + "learning_rate": 2.588171327291903e-05, + "loss": 0.16950225830078125, + "step": 3964 + }, + { + "epoch": 0.2680140597539543, + "grad_norm": 0.4533272385597229, + "learning_rate": 2.5879442017462987e-05, + "loss": 0.1049652099609375, + "step": 3965 + }, + { + "epoch": 0.2680816547248885, + "grad_norm": 0.6015534996986389, + "learning_rate": 2.587717023559188e-05, + "loss": 0.13309097290039062, + "step": 3966 + }, + { + "epoch": 0.26814924969582266, + "grad_norm": 0.4038609564304352, + "learning_rate": 2.5874897927415632e-05, + "loss": 0.050750732421875, + "step": 3967 + }, + { + "epoch": 0.26821684466675677, + "grad_norm": 0.9431567788124084, + "learning_rate": 2.5872625093044196e-05, + "loss": 0.16020965576171875, + "step": 3968 + }, + { + "epoch": 0.26828443963769094, + "grad_norm": 0.7167689204216003, + "learning_rate": 2.587035173258754e-05, + "loss": 0.1094818115234375, + "step": 3969 + }, + { + "epoch": 0.2683520346086251, + "grad_norm": 1.094023585319519, + "learning_rate": 2.5868077846155666e-05, + "loss": 0.181488037109375, + "step": 3970 + }, + { + "epoch": 0.2684196295795593, + "grad_norm": 0.947576105594635, + "learning_rate": 2.5865803433858597e-05, + "loss": 0.131561279296875, + "step": 3971 + }, + { + "epoch": 0.26848722455049345, + "grad_norm": 0.41954708099365234, + "learning_rate": 2.5863528495806382e-05, + "loss": 0.07305908203125, + "step": 3972 + }, + { + "epoch": 0.2685548195214276, + "grad_norm": 0.4571421444416046, + "learning_rate": 2.5861253032109102e-05, + "loss": 0.07867431640625, + "step": 3973 + }, + { + "epoch": 0.2686224144923618, + "grad_norm": 0.4611027240753174, + "learning_rate": 2.585897704287685e-05, + "loss": 0.07672882080078125, + "step": 3974 + }, + { + "epoch": 0.2686900094632959, + "grad_norm": 0.5090171694755554, + "learning_rate": 2.5856700528219764e-05, + "loss": 0.05742645263671875, + "step": 3975 + }, + { + "epoch": 0.2687576044342301, + "grad_norm": 0.510106086730957, + "learning_rate": 2.5854423488247986e-05, + "loss": 0.09912109375, + "step": 3976 + }, + { + "epoch": 0.26882519940516425, + "grad_norm": 1.0772526264190674, + "learning_rate": 2.5852145923071697e-05, + "loss": 0.11110305786132812, + "step": 3977 + }, + { + "epoch": 0.2688927943760984, + "grad_norm": 0.6543656587600708, + "learning_rate": 2.58498678328011e-05, + "loss": 0.152496337890625, + "step": 3978 + }, + { + "epoch": 0.2689603893470326, + "grad_norm": 1.7740200757980347, + "learning_rate": 2.5847589217546422e-05, + "loss": 0.22552490234375, + "step": 3979 + }, + { + "epoch": 0.26902798431796676, + "grad_norm": 0.48154768347740173, + "learning_rate": 2.5845310077417916e-05, + "loss": 0.09459304809570312, + "step": 3980 + }, + { + "epoch": 0.26909557928890093, + "grad_norm": 0.6661114692687988, + "learning_rate": 2.5843030412525864e-05, + "loss": 0.12896728515625, + "step": 3981 + }, + { + "epoch": 0.26916317425983505, + "grad_norm": 0.5207661986351013, + "learning_rate": 2.5840750222980568e-05, + "loss": 0.1217498779296875, + "step": 3982 + }, + { + "epoch": 0.2692307692307692, + "grad_norm": 1.393401861190796, + "learning_rate": 2.583846950889236e-05, + "loss": 0.146026611328125, + "step": 3983 + }, + { + "epoch": 0.2692983642017034, + "grad_norm": 0.2888062298297882, + "learning_rate": 2.583618827037159e-05, + "loss": 0.0431976318359375, + "step": 3984 + }, + { + "epoch": 0.26936595917263756, + "grad_norm": 0.9626493453979492, + "learning_rate": 2.5833906507528644e-05, + "loss": 0.1838531494140625, + "step": 3985 + }, + { + "epoch": 0.26943355414357173, + "grad_norm": 0.16509348154067993, + "learning_rate": 2.583162422047393e-05, + "loss": 0.02151966094970703, + "step": 3986 + }, + { + "epoch": 0.2695011491145059, + "grad_norm": 1.1692129373550415, + "learning_rate": 2.5829341409317866e-05, + "loss": 0.224395751953125, + "step": 3987 + }, + { + "epoch": 0.26956874408544007, + "grad_norm": 0.4883766174316406, + "learning_rate": 2.5827058074170925e-05, + "loss": 0.09598541259765625, + "step": 3988 + }, + { + "epoch": 0.2696363390563742, + "grad_norm": 0.7543252110481262, + "learning_rate": 2.5824774215143576e-05, + "loss": 0.1033782958984375, + "step": 3989 + }, + { + "epoch": 0.26970393402730836, + "grad_norm": 0.4489476680755615, + "learning_rate": 2.582248983234634e-05, + "loss": 0.07727813720703125, + "step": 3990 + }, + { + "epoch": 0.2697715289982425, + "grad_norm": 0.8506202697753906, + "learning_rate": 2.582020492588973e-05, + "loss": 0.13135147094726562, + "step": 3991 + }, + { + "epoch": 0.2698391239691767, + "grad_norm": 0.43407946825027466, + "learning_rate": 2.5817919495884327e-05, + "loss": 0.0798187255859375, + "step": 3992 + }, + { + "epoch": 0.26990671894011087, + "grad_norm": 0.990875244140625, + "learning_rate": 2.5815633542440697e-05, + "loss": 0.1702880859375, + "step": 3993 + }, + { + "epoch": 0.26997431391104504, + "grad_norm": 0.866734504699707, + "learning_rate": 2.5813347065669456e-05, + "loss": 0.1444091796875, + "step": 3994 + }, + { + "epoch": 0.27004190888197915, + "grad_norm": 0.583766758441925, + "learning_rate": 2.5811060065681237e-05, + "loss": 0.08277511596679688, + "step": 3995 + }, + { + "epoch": 0.2701095038529133, + "grad_norm": 0.5091392993927002, + "learning_rate": 2.58087725425867e-05, + "loss": 0.0635986328125, + "step": 3996 + }, + { + "epoch": 0.2701770988238475, + "grad_norm": 0.2529778480529785, + "learning_rate": 2.580648449649652e-05, + "loss": 0.03342628479003906, + "step": 3997 + }, + { + "epoch": 0.27024469379478167, + "grad_norm": 0.7811124920845032, + "learning_rate": 2.5804195927521422e-05, + "loss": 0.1346282958984375, + "step": 3998 + }, + { + "epoch": 0.27031228876571584, + "grad_norm": 0.8283956050872803, + "learning_rate": 2.5801906835772136e-05, + "loss": 0.241363525390625, + "step": 3999 + }, + { + "epoch": 0.27037988373665, + "grad_norm": 0.39715275168418884, + "learning_rate": 2.579961722135942e-05, + "loss": 0.060832977294921875, + "step": 4000 + }, + { + "epoch": 0.2704474787075842, + "grad_norm": 1.2438746690750122, + "learning_rate": 2.5797327084394058e-05, + "loss": 0.129547119140625, + "step": 4001 + }, + { + "epoch": 0.2705150736785183, + "grad_norm": 0.44025006890296936, + "learning_rate": 2.5795036424986863e-05, + "loss": 0.0702056884765625, + "step": 4002 + }, + { + "epoch": 0.27058266864945246, + "grad_norm": 0.17650839686393738, + "learning_rate": 2.5792745243248676e-05, + "loss": 0.03145599365234375, + "step": 4003 + }, + { + "epoch": 0.27065026362038663, + "grad_norm": 0.6974623203277588, + "learning_rate": 2.5790453539290354e-05, + "loss": 0.13755035400390625, + "step": 4004 + }, + { + "epoch": 0.2707178585913208, + "grad_norm": 0.9997608065605164, + "learning_rate": 2.5788161313222784e-05, + "loss": 0.14281463623046875, + "step": 4005 + }, + { + "epoch": 0.270785453562255, + "grad_norm": 0.509584903717041, + "learning_rate": 2.5785868565156878e-05, + "loss": 0.0806121826171875, + "step": 4006 + }, + { + "epoch": 0.27085304853318914, + "grad_norm": 1.1601758003234863, + "learning_rate": 2.5783575295203576e-05, + "loss": 0.17676544189453125, + "step": 4007 + }, + { + "epoch": 0.2709206435041233, + "grad_norm": 0.4600980877876282, + "learning_rate": 2.5781281503473837e-05, + "loss": 0.08648681640625, + "step": 4008 + }, + { + "epoch": 0.27098823847505743, + "grad_norm": 1.186228632926941, + "learning_rate": 2.577898719007865e-05, + "loss": 0.217132568359375, + "step": 4009 + }, + { + "epoch": 0.2710558334459916, + "grad_norm": 0.4428918957710266, + "learning_rate": 2.577669235512903e-05, + "loss": 0.1029815673828125, + "step": 4010 + }, + { + "epoch": 0.27112342841692577, + "grad_norm": 0.6410036683082581, + "learning_rate": 2.5774396998736017e-05, + "loss": 0.13817596435546875, + "step": 4011 + }, + { + "epoch": 0.27119102338785994, + "grad_norm": 1.393898606300354, + "learning_rate": 2.577210112101067e-05, + "loss": 0.24774169921875, + "step": 4012 + }, + { + "epoch": 0.2712586183587941, + "grad_norm": 0.8269925117492676, + "learning_rate": 2.5769804722064077e-05, + "loss": 0.1556396484375, + "step": 4013 + }, + { + "epoch": 0.2713262133297283, + "grad_norm": 1.1030641794204712, + "learning_rate": 2.576750780200736e-05, + "loss": 0.188385009765625, + "step": 4014 + }, + { + "epoch": 0.27139380830066245, + "grad_norm": 0.8871382474899292, + "learning_rate": 2.576521036095165e-05, + "loss": 0.1143341064453125, + "step": 4015 + }, + { + "epoch": 0.27146140327159657, + "grad_norm": 0.9257377982139587, + "learning_rate": 2.5762912399008114e-05, + "loss": 0.08435821533203125, + "step": 4016 + }, + { + "epoch": 0.27152899824253074, + "grad_norm": 0.7264589071273804, + "learning_rate": 2.5760613916287948e-05, + "loss": 0.056793212890625, + "step": 4017 + }, + { + "epoch": 0.2715965932134649, + "grad_norm": 0.8143872618675232, + "learning_rate": 2.5758314912902355e-05, + "loss": 0.1422882080078125, + "step": 4018 + }, + { + "epoch": 0.2716641881843991, + "grad_norm": 1.1571924686431885, + "learning_rate": 2.575601538896258e-05, + "loss": 0.218170166015625, + "step": 4019 + }, + { + "epoch": 0.27173178315533325, + "grad_norm": 1.1428053379058838, + "learning_rate": 2.5753715344579893e-05, + "loss": 0.1315765380859375, + "step": 4020 + }, + { + "epoch": 0.2717993781262674, + "grad_norm": 0.24413371086120605, + "learning_rate": 2.5751414779865583e-05, + "loss": 0.04390716552734375, + "step": 4021 + }, + { + "epoch": 0.2718669730972016, + "grad_norm": 0.8405241370201111, + "learning_rate": 2.574911369493096e-05, + "loss": 0.11658477783203125, + "step": 4022 + }, + { + "epoch": 0.2719345680681357, + "grad_norm": 0.5516873598098755, + "learning_rate": 2.574681208988737e-05, + "loss": 0.0555572509765625, + "step": 4023 + }, + { + "epoch": 0.2720021630390699, + "grad_norm": 0.3774873614311218, + "learning_rate": 2.5744509964846176e-05, + "loss": 0.04266357421875, + "step": 4024 + }, + { + "epoch": 0.27206975801000405, + "grad_norm": 1.8753867149353027, + "learning_rate": 2.574220731991877e-05, + "loss": 0.235107421875, + "step": 4025 + }, + { + "epoch": 0.2721373529809382, + "grad_norm": 0.9930019378662109, + "learning_rate": 2.573990415521657e-05, + "loss": 0.1297149658203125, + "step": 4026 + }, + { + "epoch": 0.2722049479518724, + "grad_norm": 1.0754505395889282, + "learning_rate": 2.573760047085102e-05, + "loss": 0.1782379150390625, + "step": 4027 + }, + { + "epoch": 0.27227254292280656, + "grad_norm": 1.8325265645980835, + "learning_rate": 2.5735296266933577e-05, + "loss": 0.258880615234375, + "step": 4028 + }, + { + "epoch": 0.27234013789374073, + "grad_norm": 0.7305588126182556, + "learning_rate": 2.5732991543575745e-05, + "loss": 0.143798828125, + "step": 4029 + }, + { + "epoch": 0.27240773286467485, + "grad_norm": 0.5934497714042664, + "learning_rate": 2.5730686300889026e-05, + "loss": 0.083526611328125, + "step": 4030 + }, + { + "epoch": 0.272475327835609, + "grad_norm": 0.9986864328384399, + "learning_rate": 2.5728380538984975e-05, + "loss": 0.157196044921875, + "step": 4031 + }, + { + "epoch": 0.2725429228065432, + "grad_norm": 0.6186337471008301, + "learning_rate": 2.5726074257975153e-05, + "loss": 0.152984619140625, + "step": 4032 + }, + { + "epoch": 0.27261051777747736, + "grad_norm": 0.7118160724639893, + "learning_rate": 2.5723767457971154e-05, + "loss": 0.12797164916992188, + "step": 4033 + }, + { + "epoch": 0.2726781127484115, + "grad_norm": 0.2699330151081085, + "learning_rate": 2.5721460139084597e-05, + "loss": 0.039318084716796875, + "step": 4034 + }, + { + "epoch": 0.2727457077193457, + "grad_norm": 1.8705378770828247, + "learning_rate": 2.571915230142712e-05, + "loss": 0.270050048828125, + "step": 4035 + }, + { + "epoch": 0.27281330269027987, + "grad_norm": 1.4516571760177612, + "learning_rate": 2.5716843945110396e-05, + "loss": 0.18621826171875, + "step": 4036 + }, + { + "epoch": 0.272880897661214, + "grad_norm": 0.3642235994338989, + "learning_rate": 2.571453507024611e-05, + "loss": 0.058429718017578125, + "step": 4037 + }, + { + "epoch": 0.27294849263214815, + "grad_norm": 0.7642188668251038, + "learning_rate": 2.5712225676945986e-05, + "loss": 0.099884033203125, + "step": 4038 + }, + { + "epoch": 0.2730160876030823, + "grad_norm": 0.8702679872512817, + "learning_rate": 2.5709915765321766e-05, + "loss": 0.1168212890625, + "step": 4039 + }, + { + "epoch": 0.2730836825740165, + "grad_norm": 0.31841006875038147, + "learning_rate": 2.5707605335485218e-05, + "loss": 0.0565338134765625, + "step": 4040 + }, + { + "epoch": 0.27315127754495067, + "grad_norm": 0.3982633650302887, + "learning_rate": 2.5705294387548134e-05, + "loss": 0.0966033935546875, + "step": 4041 + }, + { + "epoch": 0.27321887251588484, + "grad_norm": 0.9697638750076294, + "learning_rate": 2.5702982921622328e-05, + "loss": 0.1510467529296875, + "step": 4042 + }, + { + "epoch": 0.273286467486819, + "grad_norm": 0.4826951026916504, + "learning_rate": 2.570067093781965e-05, + "loss": 0.1326904296875, + "step": 4043 + }, + { + "epoch": 0.2733540624577531, + "grad_norm": 0.4806624948978424, + "learning_rate": 2.5698358436251966e-05, + "loss": 0.104644775390625, + "step": 4044 + }, + { + "epoch": 0.2734216574286873, + "grad_norm": 0.1809806078672409, + "learning_rate": 2.5696045417031168e-05, + "loss": 0.02794647216796875, + "step": 4045 + }, + { + "epoch": 0.27348925239962146, + "grad_norm": 1.3342629671096802, + "learning_rate": 2.5693731880269178e-05, + "loss": 0.19580078125, + "step": 4046 + }, + { + "epoch": 0.27355684737055563, + "grad_norm": 0.9672161340713501, + "learning_rate": 2.569141782607793e-05, + "loss": 0.18756103515625, + "step": 4047 + }, + { + "epoch": 0.2736244423414898, + "grad_norm": 1.1751830577850342, + "learning_rate": 2.5689103254569402e-05, + "loss": 0.198638916015625, + "step": 4048 + }, + { + "epoch": 0.273692037312424, + "grad_norm": 0.45778200030326843, + "learning_rate": 2.5686788165855583e-05, + "loss": 0.101654052734375, + "step": 4049 + }, + { + "epoch": 0.27375963228335815, + "grad_norm": 1.2305448055267334, + "learning_rate": 2.568447256004849e-05, + "loss": 0.1706390380859375, + "step": 4050 + }, + { + "epoch": 0.27382722725429226, + "grad_norm": 0.4841082990169525, + "learning_rate": 2.568215643726017e-05, + "loss": 0.11289215087890625, + "step": 4051 + }, + { + "epoch": 0.27389482222522643, + "grad_norm": 0.3275831639766693, + "learning_rate": 2.567983979760269e-05, + "loss": 0.052700042724609375, + "step": 4052 + }, + { + "epoch": 0.2739624171961606, + "grad_norm": 0.8357971906661987, + "learning_rate": 2.5677522641188145e-05, + "loss": 0.12595367431640625, + "step": 4053 + }, + { + "epoch": 0.27403001216709477, + "grad_norm": 0.37062835693359375, + "learning_rate": 2.5675204968128652e-05, + "loss": 0.05432891845703125, + "step": 4054 + }, + { + "epoch": 0.27409760713802894, + "grad_norm": 0.4262952506542206, + "learning_rate": 2.5672886778536352e-05, + "loss": 0.0501708984375, + "step": 4055 + }, + { + "epoch": 0.2741652021089631, + "grad_norm": 0.5352064967155457, + "learning_rate": 2.5670568072523415e-05, + "loss": 0.073516845703125, + "step": 4056 + }, + { + "epoch": 0.27423279707989723, + "grad_norm": 0.5914155840873718, + "learning_rate": 2.5668248850202037e-05, + "loss": 0.10267829895019531, + "step": 4057 + }, + { + "epoch": 0.2743003920508314, + "grad_norm": 0.7401809692382812, + "learning_rate": 2.5665929111684435e-05, + "loss": 0.12997055053710938, + "step": 4058 + }, + { + "epoch": 0.27436798702176557, + "grad_norm": 0.3513144254684448, + "learning_rate": 2.566360885708285e-05, + "loss": 0.07028579711914062, + "step": 4059 + }, + { + "epoch": 0.27443558199269974, + "grad_norm": 0.31482818722724915, + "learning_rate": 2.5661288086509553e-05, + "loss": 0.06188201904296875, + "step": 4060 + }, + { + "epoch": 0.2745031769636339, + "grad_norm": 1.4671037197113037, + "learning_rate": 2.5658966800076835e-05, + "loss": 0.22489166259765625, + "step": 4061 + }, + { + "epoch": 0.2745707719345681, + "grad_norm": 0.546591579914093, + "learning_rate": 2.5656644997897018e-05, + "loss": 0.1076812744140625, + "step": 4062 + }, + { + "epoch": 0.27463836690550225, + "grad_norm": 1.3470745086669922, + "learning_rate": 2.5654322680082445e-05, + "loss": 0.13216400146484375, + "step": 4063 + }, + { + "epoch": 0.27470596187643637, + "grad_norm": 1.0588144063949585, + "learning_rate": 2.5651999846745476e-05, + "loss": 0.1759185791015625, + "step": 4064 + }, + { + "epoch": 0.27477355684737054, + "grad_norm": 0.508919894695282, + "learning_rate": 2.5649676497998517e-05, + "loss": 0.12579345703125, + "step": 4065 + }, + { + "epoch": 0.2748411518183047, + "grad_norm": 1.0450202226638794, + "learning_rate": 2.564735263395398e-05, + "loss": 0.204193115234375, + "step": 4066 + }, + { + "epoch": 0.2749087467892389, + "grad_norm": 1.021415114402771, + "learning_rate": 2.56450282547243e-05, + "loss": 0.15682220458984375, + "step": 4067 + }, + { + "epoch": 0.27497634176017305, + "grad_norm": 0.7576168775558472, + "learning_rate": 2.5642703360421954e-05, + "loss": 0.1124267578125, + "step": 4068 + }, + { + "epoch": 0.2750439367311072, + "grad_norm": 0.6026435494422913, + "learning_rate": 2.5640377951159436e-05, + "loss": 0.076416015625, + "step": 4069 + }, + { + "epoch": 0.2751115317020414, + "grad_norm": 0.5703989267349243, + "learning_rate": 2.5638052027049265e-05, + "loss": 0.11379623413085938, + "step": 4070 + }, + { + "epoch": 0.2751791266729755, + "grad_norm": 0.41845038533210754, + "learning_rate": 2.5635725588203972e-05, + "loss": 0.06988525390625, + "step": 4071 + }, + { + "epoch": 0.2752467216439097, + "grad_norm": 0.42114996910095215, + "learning_rate": 2.5633398634736135e-05, + "loss": 0.060420989990234375, + "step": 4072 + }, + { + "epoch": 0.27531431661484385, + "grad_norm": 0.39112579822540283, + "learning_rate": 2.563107116675834e-05, + "loss": 0.07022857666015625, + "step": 4073 + }, + { + "epoch": 0.275381911585778, + "grad_norm": 0.47225549817085266, + "learning_rate": 2.5628743184383218e-05, + "loss": 0.08443069458007812, + "step": 4074 + }, + { + "epoch": 0.2754495065567122, + "grad_norm": 0.23675097525119781, + "learning_rate": 2.5626414687723395e-05, + "loss": 0.050445556640625, + "step": 4075 + }, + { + "epoch": 0.27551710152764636, + "grad_norm": 0.9026038646697998, + "learning_rate": 2.5624085676891543e-05, + "loss": 0.1275482177734375, + "step": 4076 + }, + { + "epoch": 0.27558469649858053, + "grad_norm": 1.060463786125183, + "learning_rate": 2.5621756152000356e-05, + "loss": 0.2396240234375, + "step": 4077 + }, + { + "epoch": 0.27565229146951464, + "grad_norm": 0.3100475072860718, + "learning_rate": 2.5619426113162553e-05, + "loss": 0.044017791748046875, + "step": 4078 + }, + { + "epoch": 0.2757198864404488, + "grad_norm": 0.6565757393836975, + "learning_rate": 2.5617095560490868e-05, + "loss": 0.12751007080078125, + "step": 4079 + }, + { + "epoch": 0.275787481411383, + "grad_norm": 0.5346678495407104, + "learning_rate": 2.5614764494098078e-05, + "loss": 0.079376220703125, + "step": 4080 + }, + { + "epoch": 0.27585507638231715, + "grad_norm": 0.7277989387512207, + "learning_rate": 2.5612432914096967e-05, + "loss": 0.10000801086425781, + "step": 4081 + }, + { + "epoch": 0.2759226713532513, + "grad_norm": 0.2897702753543854, + "learning_rate": 2.5610100820600353e-05, + "loss": 0.04792022705078125, + "step": 4082 + }, + { + "epoch": 0.2759902663241855, + "grad_norm": 1.4319798946380615, + "learning_rate": 2.560776821372108e-05, + "loss": 0.16100311279296875, + "step": 4083 + }, + { + "epoch": 0.27605786129511967, + "grad_norm": 1.1848002672195435, + "learning_rate": 2.560543509357201e-05, + "loss": 0.11127662658691406, + "step": 4084 + }, + { + "epoch": 0.2761254562660538, + "grad_norm": 0.7896772027015686, + "learning_rate": 2.5603101460266036e-05, + "loss": 0.12377166748046875, + "step": 4085 + }, + { + "epoch": 0.27619305123698795, + "grad_norm": 1.3649710416793823, + "learning_rate": 2.5600767313916076e-05, + "loss": 0.1762542724609375, + "step": 4086 + }, + { + "epoch": 0.2762606462079221, + "grad_norm": 1.746672511100769, + "learning_rate": 2.5598432654635064e-05, + "loss": 0.22607421875, + "step": 4087 + }, + { + "epoch": 0.2763282411788563, + "grad_norm": 0.5151990056037903, + "learning_rate": 2.559609748253597e-05, + "loss": 0.13946151733398438, + "step": 4088 + }, + { + "epoch": 0.27639583614979046, + "grad_norm": 0.6414664387702942, + "learning_rate": 2.5593761797731783e-05, + "loss": 0.166412353515625, + "step": 4089 + }, + { + "epoch": 0.27646343112072463, + "grad_norm": 0.3895827829837799, + "learning_rate": 2.559142560033552e-05, + "loss": 0.09490966796875, + "step": 4090 + }, + { + "epoch": 0.2765310260916588, + "grad_norm": 1.0665154457092285, + "learning_rate": 2.558908889046022e-05, + "loss": 0.13743209838867188, + "step": 4091 + }, + { + "epoch": 0.2765986210625929, + "grad_norm": 0.9194621443748474, + "learning_rate": 2.5586751668218945e-05, + "loss": 0.119537353515625, + "step": 4092 + }, + { + "epoch": 0.2766662160335271, + "grad_norm": 0.815998375415802, + "learning_rate": 2.5584413933724785e-05, + "loss": 0.12465667724609375, + "step": 4093 + }, + { + "epoch": 0.27673381100446126, + "grad_norm": 0.40387052297592163, + "learning_rate": 2.5582075687090855e-05, + "loss": 0.06850433349609375, + "step": 4094 + }, + { + "epoch": 0.27680140597539543, + "grad_norm": 1.2361743450164795, + "learning_rate": 2.55797369284303e-05, + "loss": 0.2027130126953125, + "step": 4095 + }, + { + "epoch": 0.2768690009463296, + "grad_norm": 1.0599344968795776, + "learning_rate": 2.5577397657856272e-05, + "loss": 0.202178955078125, + "step": 4096 + }, + { + "epoch": 0.2769365959172638, + "grad_norm": 0.8670567274093628, + "learning_rate": 2.5575057875481966e-05, + "loss": 0.12296867370605469, + "step": 4097 + }, + { + "epoch": 0.27700419088819794, + "grad_norm": 0.6307282447814941, + "learning_rate": 2.5572717581420593e-05, + "loss": 0.12725830078125, + "step": 4098 + }, + { + "epoch": 0.27707178585913206, + "grad_norm": 0.7141701579093933, + "learning_rate": 2.5570376775785397e-05, + "loss": 0.144287109375, + "step": 4099 + }, + { + "epoch": 0.27713938083006623, + "grad_norm": 0.6477282047271729, + "learning_rate": 2.5568035458689634e-05, + "loss": 0.091583251953125, + "step": 4100 + }, + { + "epoch": 0.2772069758010004, + "grad_norm": 0.5224392414093018, + "learning_rate": 2.556569363024659e-05, + "loss": 0.0941619873046875, + "step": 4101 + }, + { + "epoch": 0.27727457077193457, + "grad_norm": 0.8512950539588928, + "learning_rate": 2.5563351290569583e-05, + "loss": 0.1305694580078125, + "step": 4102 + }, + { + "epoch": 0.27734216574286874, + "grad_norm": 0.31556299328804016, + "learning_rate": 2.556100843977195e-05, + "loss": 0.0545806884765625, + "step": 4103 + }, + { + "epoch": 0.2774097607138029, + "grad_norm": 0.9698886275291443, + "learning_rate": 2.555866507796705e-05, + "loss": 0.1773681640625, + "step": 4104 + }, + { + "epoch": 0.2774773556847371, + "grad_norm": 1.3596891164779663, + "learning_rate": 2.5556321205268266e-05, + "loss": 0.1790313720703125, + "step": 4105 + }, + { + "epoch": 0.2775449506556712, + "grad_norm": 0.9064018726348877, + "learning_rate": 2.5553976821789015e-05, + "loss": 0.1928558349609375, + "step": 4106 + }, + { + "epoch": 0.27761254562660537, + "grad_norm": 1.3106489181518555, + "learning_rate": 2.5551631927642734e-05, + "loss": 0.1919097900390625, + "step": 4107 + }, + { + "epoch": 0.27768014059753954, + "grad_norm": 0.4409236013889313, + "learning_rate": 2.5549286522942876e-05, + "loss": 0.06633758544921875, + "step": 4108 + }, + { + "epoch": 0.2777477355684737, + "grad_norm": 1.1631518602371216, + "learning_rate": 2.554694060780294e-05, + "loss": 0.245758056640625, + "step": 4109 + }, + { + "epoch": 0.2778153305394079, + "grad_norm": 0.4651370644569397, + "learning_rate": 2.5544594182336415e-05, + "loss": 0.038272857666015625, + "step": 4110 + }, + { + "epoch": 0.27788292551034205, + "grad_norm": 0.4989061951637268, + "learning_rate": 2.5542247246656854e-05, + "loss": 0.066375732421875, + "step": 4111 + }, + { + "epoch": 0.2779505204812762, + "grad_norm": 0.7044951319694519, + "learning_rate": 2.5539899800877807e-05, + "loss": 0.0677642822265625, + "step": 4112 + }, + { + "epoch": 0.27801811545221033, + "grad_norm": 1.8240466117858887, + "learning_rate": 2.553755184511286e-05, + "loss": 0.16309738159179688, + "step": 4113 + }, + { + "epoch": 0.2780857104231445, + "grad_norm": 0.37712937593460083, + "learning_rate": 2.553520337947562e-05, + "loss": 0.0758819580078125, + "step": 4114 + }, + { + "epoch": 0.2781533053940787, + "grad_norm": 2.005920886993408, + "learning_rate": 2.5532854404079725e-05, + "loss": 0.211822509765625, + "step": 4115 + }, + { + "epoch": 0.27822090036501285, + "grad_norm": 0.3530423939228058, + "learning_rate": 2.5530504919038835e-05, + "loss": 0.08722686767578125, + "step": 4116 + }, + { + "epoch": 0.278288495335947, + "grad_norm": 0.3583385944366455, + "learning_rate": 2.5528154924466627e-05, + "loss": 0.05843353271484375, + "step": 4117 + }, + { + "epoch": 0.2783560903068812, + "grad_norm": 0.7903674244880676, + "learning_rate": 2.552580442047681e-05, + "loss": 0.13232421875, + "step": 4118 + }, + { + "epoch": 0.27842368527781536, + "grad_norm": 1.6218162775039673, + "learning_rate": 2.5523453407183112e-05, + "loss": 0.192138671875, + "step": 4119 + }, + { + "epoch": 0.2784912802487495, + "grad_norm": 0.9156242609024048, + "learning_rate": 2.5521101884699298e-05, + "loss": 0.14539337158203125, + "step": 4120 + }, + { + "epoch": 0.27855887521968364, + "grad_norm": 0.4104066491127014, + "learning_rate": 2.5518749853139144e-05, + "loss": 0.05599212646484375, + "step": 4121 + }, + { + "epoch": 0.2786264701906178, + "grad_norm": 0.4550076127052307, + "learning_rate": 2.5516397312616456e-05, + "loss": 0.07852935791015625, + "step": 4122 + }, + { + "epoch": 0.278694065161552, + "grad_norm": 0.716219425201416, + "learning_rate": 2.5514044263245064e-05, + "loss": 0.11733245849609375, + "step": 4123 + }, + { + "epoch": 0.27876166013248616, + "grad_norm": 0.4437854290008545, + "learning_rate": 2.5511690705138826e-05, + "loss": 0.0847015380859375, + "step": 4124 + }, + { + "epoch": 0.2788292551034203, + "grad_norm": 0.8157212138175964, + "learning_rate": 2.5509336638411614e-05, + "loss": 0.1555633544921875, + "step": 4125 + }, + { + "epoch": 0.27889685007435444, + "grad_norm": 0.6007487177848816, + "learning_rate": 2.5506982063177347e-05, + "loss": 0.145904541015625, + "step": 4126 + }, + { + "epoch": 0.2789644450452886, + "grad_norm": 0.4757401943206787, + "learning_rate": 2.550462697954994e-05, + "loss": 0.1236419677734375, + "step": 4127 + }, + { + "epoch": 0.2790320400162228, + "grad_norm": 0.6956837177276611, + "learning_rate": 2.5502271387643353e-05, + "loss": 0.13298797607421875, + "step": 4128 + }, + { + "epoch": 0.27909963498715695, + "grad_norm": 1.0160598754882812, + "learning_rate": 2.5499915287571566e-05, + "loss": 0.15245819091796875, + "step": 4129 + }, + { + "epoch": 0.2791672299580911, + "grad_norm": 0.35465991497039795, + "learning_rate": 2.549755867944858e-05, + "loss": 0.05161285400390625, + "step": 4130 + }, + { + "epoch": 0.2792348249290253, + "grad_norm": 0.47173547744750977, + "learning_rate": 2.549520156338842e-05, + "loss": 0.06607437133789062, + "step": 4131 + }, + { + "epoch": 0.27930241989995946, + "grad_norm": 1.3107095956802368, + "learning_rate": 2.5492843939505142e-05, + "loss": 0.192291259765625, + "step": 4132 + }, + { + "epoch": 0.2793700148708936, + "grad_norm": 0.614486038684845, + "learning_rate": 2.5490485807912817e-05, + "loss": 0.1094512939453125, + "step": 4133 + }, + { + "epoch": 0.27943760984182775, + "grad_norm": 0.31221455335617065, + "learning_rate": 2.548812716872555e-05, + "loss": 0.06344223022460938, + "step": 4134 + }, + { + "epoch": 0.2795052048127619, + "grad_norm": 0.8559983968734741, + "learning_rate": 2.548576802205746e-05, + "loss": 0.1351165771484375, + "step": 4135 + }, + { + "epoch": 0.2795727997836961, + "grad_norm": 0.23630933463573456, + "learning_rate": 2.548340836802271e-05, + "loss": 0.0468902587890625, + "step": 4136 + }, + { + "epoch": 0.27964039475463026, + "grad_norm": 0.6853467226028442, + "learning_rate": 2.5481048206735464e-05, + "loss": 0.199615478515625, + "step": 4137 + }, + { + "epoch": 0.27970798972556443, + "grad_norm": 0.8518983125686646, + "learning_rate": 2.5478687538309924e-05, + "loss": 0.1863861083984375, + "step": 4138 + }, + { + "epoch": 0.2797755846964986, + "grad_norm": 0.35230129957199097, + "learning_rate": 2.5476326362860315e-05, + "loss": 0.06348419189453125, + "step": 4139 + }, + { + "epoch": 0.2798431796674327, + "grad_norm": 0.3147876560688019, + "learning_rate": 2.5473964680500888e-05, + "loss": 0.073699951171875, + "step": 4140 + }, + { + "epoch": 0.2799107746383669, + "grad_norm": 0.756666362285614, + "learning_rate": 2.547160249134591e-05, + "loss": 0.1370391845703125, + "step": 4141 + }, + { + "epoch": 0.27997836960930106, + "grad_norm": 0.34605419635772705, + "learning_rate": 2.546923979550968e-05, + "loss": 0.06913948059082031, + "step": 4142 + }, + { + "epoch": 0.28004596458023523, + "grad_norm": 0.3502381145954132, + "learning_rate": 2.5466876593106526e-05, + "loss": 0.06072235107421875, + "step": 4143 + }, + { + "epoch": 0.2801135595511694, + "grad_norm": 0.4588993787765503, + "learning_rate": 2.5464512884250788e-05, + "loss": 0.0975189208984375, + "step": 4144 + }, + { + "epoch": 0.28018115452210357, + "grad_norm": 0.2175598293542862, + "learning_rate": 2.546214866905684e-05, + "loss": 0.02980804443359375, + "step": 4145 + }, + { + "epoch": 0.28024874949303774, + "grad_norm": 0.7499778270721436, + "learning_rate": 2.5459783947639072e-05, + "loss": 0.06348037719726562, + "step": 4146 + }, + { + "epoch": 0.28031634446397186, + "grad_norm": 1.0161855220794678, + "learning_rate": 2.5457418720111908e-05, + "loss": 0.12086105346679688, + "step": 4147 + }, + { + "epoch": 0.280383939434906, + "grad_norm": 0.32372570037841797, + "learning_rate": 2.5455052986589796e-05, + "loss": 0.0562286376953125, + "step": 4148 + }, + { + "epoch": 0.2804515344058402, + "grad_norm": 0.3222994804382324, + "learning_rate": 2.54526867471872e-05, + "loss": 0.035114288330078125, + "step": 4149 + }, + { + "epoch": 0.28051912937677437, + "grad_norm": 0.345588743686676, + "learning_rate": 2.5450320002018612e-05, + "loss": 0.051300048828125, + "step": 4150 + }, + { + "epoch": 0.28058672434770854, + "grad_norm": 0.25889116525650024, + "learning_rate": 2.544795275119855e-05, + "loss": 0.04476165771484375, + "step": 4151 + }, + { + "epoch": 0.2806543193186427, + "grad_norm": 0.4182625710964203, + "learning_rate": 2.5445584994841564e-05, + "loss": 0.06317138671875, + "step": 4152 + }, + { + "epoch": 0.2807219142895769, + "grad_norm": 0.4329453110694885, + "learning_rate": 2.5443216733062216e-05, + "loss": 0.06390380859375, + "step": 4153 + }, + { + "epoch": 0.280789509260511, + "grad_norm": 1.2230297327041626, + "learning_rate": 2.5440847965975097e-05, + "loss": 0.16241455078125, + "step": 4154 + }, + { + "epoch": 0.28085710423144516, + "grad_norm": 0.6869068145751953, + "learning_rate": 2.543847869369482e-05, + "loss": 0.152923583984375, + "step": 4155 + }, + { + "epoch": 0.28092469920237934, + "grad_norm": 0.9491056203842163, + "learning_rate": 2.5436108916336028e-05, + "loss": 0.187164306640625, + "step": 4156 + }, + { + "epoch": 0.2809922941733135, + "grad_norm": 0.9541608095169067, + "learning_rate": 2.5433738634013388e-05, + "loss": 0.13797378540039062, + "step": 4157 + }, + { + "epoch": 0.2810598891442477, + "grad_norm": 0.3148851692676544, + "learning_rate": 2.543136784684158e-05, + "loss": 0.03661537170410156, + "step": 4158 + }, + { + "epoch": 0.28112748411518185, + "grad_norm": 1.0960239171981812, + "learning_rate": 2.5428996554935332e-05, + "loss": 0.22113037109375, + "step": 4159 + }, + { + "epoch": 0.281195079086116, + "grad_norm": 1.2460732460021973, + "learning_rate": 2.5426624758409364e-05, + "loss": 0.1651153564453125, + "step": 4160 + }, + { + "epoch": 0.28126267405705013, + "grad_norm": 1.1631237268447876, + "learning_rate": 2.5424252457378454e-05, + "loss": 0.19488525390625, + "step": 4161 + }, + { + "epoch": 0.2813302690279843, + "grad_norm": 1.0255956649780273, + "learning_rate": 2.542187965195738e-05, + "loss": 0.12775421142578125, + "step": 4162 + }, + { + "epoch": 0.2813978639989185, + "grad_norm": 1.2024989128112793, + "learning_rate": 2.5419506342260958e-05, + "loss": 0.1722412109375, + "step": 4163 + }, + { + "epoch": 0.28146545896985264, + "grad_norm": 0.6148825287818909, + "learning_rate": 2.5417132528404016e-05, + "loss": 0.10334396362304688, + "step": 4164 + }, + { + "epoch": 0.2815330539407868, + "grad_norm": 0.8148446083068848, + "learning_rate": 2.5414758210501424e-05, + "loss": 0.1396331787109375, + "step": 4165 + }, + { + "epoch": 0.281600648911721, + "grad_norm": 0.7929608821868896, + "learning_rate": 2.541238338866806e-05, + "loss": 0.1214447021484375, + "step": 4166 + }, + { + "epoch": 0.28166824388265516, + "grad_norm": 0.47861382365226746, + "learning_rate": 2.5410008063018832e-05, + "loss": 0.051483154296875, + "step": 4167 + }, + { + "epoch": 0.28173583885358927, + "grad_norm": 0.3505609333515167, + "learning_rate": 2.540763223366868e-05, + "loss": 0.06169891357421875, + "step": 4168 + }, + { + "epoch": 0.28180343382452344, + "grad_norm": 0.40075135231018066, + "learning_rate": 2.540525590073255e-05, + "loss": 0.09406280517578125, + "step": 4169 + }, + { + "epoch": 0.2818710287954576, + "grad_norm": 0.826323390007019, + "learning_rate": 2.5402879064325433e-05, + "loss": 0.174835205078125, + "step": 4170 + }, + { + "epoch": 0.2819386237663918, + "grad_norm": 0.3387523889541626, + "learning_rate": 2.5400501724562328e-05, + "loss": 0.053984642028808594, + "step": 4171 + }, + { + "epoch": 0.28200621873732595, + "grad_norm": 0.3628734052181244, + "learning_rate": 2.5398123881558274e-05, + "loss": 0.069122314453125, + "step": 4172 + }, + { + "epoch": 0.2820738137082601, + "grad_norm": 1.665744662284851, + "learning_rate": 2.5395745535428318e-05, + "loss": 0.1923675537109375, + "step": 4173 + }, + { + "epoch": 0.2821414086791943, + "grad_norm": 0.3948701322078705, + "learning_rate": 2.539336668628754e-05, + "loss": 0.0778350830078125, + "step": 4174 + }, + { + "epoch": 0.2822090036501284, + "grad_norm": 0.6537513732910156, + "learning_rate": 2.5390987334251052e-05, + "loss": 0.08064651489257812, + "step": 4175 + }, + { + "epoch": 0.2822765986210626, + "grad_norm": 0.9017170071601868, + "learning_rate": 2.538860747943398e-05, + "loss": 0.17779541015625, + "step": 4176 + }, + { + "epoch": 0.28234419359199675, + "grad_norm": 1.9327818155288696, + "learning_rate": 2.5386227121951465e-05, + "loss": 0.319000244140625, + "step": 4177 + }, + { + "epoch": 0.2824117885629309, + "grad_norm": 1.2273763418197632, + "learning_rate": 2.538384626191869e-05, + "loss": 0.16934967041015625, + "step": 4178 + }, + { + "epoch": 0.2824793835338651, + "grad_norm": 0.54307621717453, + "learning_rate": 2.538146489945086e-05, + "loss": 0.1404571533203125, + "step": 4179 + }, + { + "epoch": 0.28254697850479926, + "grad_norm": 0.5278947353363037, + "learning_rate": 2.5379083034663196e-05, + "loss": 0.0598907470703125, + "step": 4180 + }, + { + "epoch": 0.28261457347573343, + "grad_norm": 0.9780970215797424, + "learning_rate": 2.5376700667670946e-05, + "loss": 0.17249298095703125, + "step": 4181 + }, + { + "epoch": 0.28268216844666755, + "grad_norm": 0.491477370262146, + "learning_rate": 2.5374317798589383e-05, + "loss": 0.1058349609375, + "step": 4182 + }, + { + "epoch": 0.2827497634176017, + "grad_norm": 1.0988082885742188, + "learning_rate": 2.5371934427533815e-05, + "loss": 0.13225555419921875, + "step": 4183 + }, + { + "epoch": 0.2828173583885359, + "grad_norm": 0.2919129431247711, + "learning_rate": 2.536955055461955e-05, + "loss": 0.044010162353515625, + "step": 4184 + }, + { + "epoch": 0.28288495335947006, + "grad_norm": 0.18145117163658142, + "learning_rate": 2.536716617996195e-05, + "loss": 0.032535552978515625, + "step": 4185 + }, + { + "epoch": 0.28295254833040423, + "grad_norm": 0.214182510972023, + "learning_rate": 2.5364781303676368e-05, + "loss": 0.03945159912109375, + "step": 4186 + }, + { + "epoch": 0.2830201433013384, + "grad_norm": 0.786153256893158, + "learning_rate": 2.5362395925878216e-05, + "loss": 0.1302032470703125, + "step": 4187 + }, + { + "epoch": 0.2830877382722725, + "grad_norm": 0.822124183177948, + "learning_rate": 2.5360010046682898e-05, + "loss": 0.13091278076171875, + "step": 4188 + }, + { + "epoch": 0.2831553332432067, + "grad_norm": 0.8622755408287048, + "learning_rate": 2.5357623666205876e-05, + "loss": 0.18253326416015625, + "step": 4189 + }, + { + "epoch": 0.28322292821414086, + "grad_norm": 1.021344542503357, + "learning_rate": 2.5355236784562603e-05, + "loss": 0.12474441528320312, + "step": 4190 + }, + { + "epoch": 0.283290523185075, + "grad_norm": 0.26332446932792664, + "learning_rate": 2.5352849401868574e-05, + "loss": 0.04509735107421875, + "step": 4191 + }, + { + "epoch": 0.2833581181560092, + "grad_norm": 0.6706135869026184, + "learning_rate": 2.5350461518239308e-05, + "loss": 0.13150787353515625, + "step": 4192 + }, + { + "epoch": 0.28342571312694337, + "grad_norm": 1.0608179569244385, + "learning_rate": 2.5348073133790343e-05, + "loss": 0.1374359130859375, + "step": 4193 + }, + { + "epoch": 0.28349330809787754, + "grad_norm": 0.4344444274902344, + "learning_rate": 2.534568424863725e-05, + "loss": 0.06972122192382812, + "step": 4194 + }, + { + "epoch": 0.28356090306881165, + "grad_norm": 0.7840052247047424, + "learning_rate": 2.5343294862895613e-05, + "loss": 0.1914825439453125, + "step": 4195 + }, + { + "epoch": 0.2836284980397458, + "grad_norm": 0.9338513612747192, + "learning_rate": 2.5340904976681044e-05, + "loss": 0.1350555419921875, + "step": 4196 + }, + { + "epoch": 0.28369609301068, + "grad_norm": 0.21112209558486938, + "learning_rate": 2.5338514590109185e-05, + "loss": 0.031620025634765625, + "step": 4197 + }, + { + "epoch": 0.28376368798161417, + "grad_norm": 1.7314207553863525, + "learning_rate": 2.5336123703295694e-05, + "loss": 0.218902587890625, + "step": 4198 + }, + { + "epoch": 0.28383128295254834, + "grad_norm": 0.6359741687774658, + "learning_rate": 2.533373231635626e-05, + "loss": 0.10007095336914062, + "step": 4199 + }, + { + "epoch": 0.2838988779234825, + "grad_norm": 1.0477226972579956, + "learning_rate": 2.5331340429406594e-05, + "loss": 0.1622314453125, + "step": 4200 + }, + { + "epoch": 0.2839664728944167, + "grad_norm": 0.4924392104148865, + "learning_rate": 2.532894804256242e-05, + "loss": 0.08438873291015625, + "step": 4201 + }, + { + "epoch": 0.2840340678653508, + "grad_norm": 0.5809541940689087, + "learning_rate": 2.5326555155939514e-05, + "loss": 0.108856201171875, + "step": 4202 + }, + { + "epoch": 0.28410166283628496, + "grad_norm": 0.7415660619735718, + "learning_rate": 2.5324161769653645e-05, + "loss": 0.1157379150390625, + "step": 4203 + }, + { + "epoch": 0.28416925780721913, + "grad_norm": 1.5072309970855713, + "learning_rate": 2.5321767883820627e-05, + "loss": 0.24676513671875, + "step": 4204 + }, + { + "epoch": 0.2842368527781533, + "grad_norm": 1.605965256690979, + "learning_rate": 2.5319373498556288e-05, + "loss": 0.18727874755859375, + "step": 4205 + }, + { + "epoch": 0.2843044477490875, + "grad_norm": 0.8853277564048767, + "learning_rate": 2.531697861397648e-05, + "loss": 0.17469024658203125, + "step": 4206 + }, + { + "epoch": 0.28437204272002164, + "grad_norm": 0.3878655433654785, + "learning_rate": 2.5314583230197084e-05, + "loss": 0.0933685302734375, + "step": 4207 + }, + { + "epoch": 0.2844396376909558, + "grad_norm": 0.35194528102874756, + "learning_rate": 2.531218734733401e-05, + "loss": 0.07660865783691406, + "step": 4208 + }, + { + "epoch": 0.28450723266188993, + "grad_norm": 0.5375728607177734, + "learning_rate": 2.530979096550318e-05, + "loss": 0.09765625, + "step": 4209 + }, + { + "epoch": 0.2845748276328241, + "grad_norm": 0.8421525955200195, + "learning_rate": 2.530739408482055e-05, + "loss": 0.13037109375, + "step": 4210 + }, + { + "epoch": 0.28464242260375827, + "grad_norm": 0.7147271633148193, + "learning_rate": 2.530499670540209e-05, + "loss": 0.13014984130859375, + "step": 4211 + }, + { + "epoch": 0.28471001757469244, + "grad_norm": 0.6860789060592651, + "learning_rate": 2.5302598827363804e-05, + "loss": 0.140350341796875, + "step": 4212 + }, + { + "epoch": 0.2847776125456266, + "grad_norm": 0.7229522466659546, + "learning_rate": 2.530020045082172e-05, + "loss": 0.19427490234375, + "step": 4213 + }, + { + "epoch": 0.2848452075165608, + "grad_norm": 0.47871100902557373, + "learning_rate": 2.5297801575891873e-05, + "loss": 0.1301727294921875, + "step": 4214 + }, + { + "epoch": 0.28491280248749495, + "grad_norm": 0.2807922065258026, + "learning_rate": 2.5295402202690353e-05, + "loss": 0.05442047119140625, + "step": 4215 + }, + { + "epoch": 0.28498039745842907, + "grad_norm": 0.4146929979324341, + "learning_rate": 2.5293002331333246e-05, + "loss": 0.08421707153320312, + "step": 4216 + }, + { + "epoch": 0.28504799242936324, + "grad_norm": 0.6111543774604797, + "learning_rate": 2.529060196193667e-05, + "loss": 0.163818359375, + "step": 4217 + }, + { + "epoch": 0.2851155874002974, + "grad_norm": 0.6889259815216064, + "learning_rate": 2.5288201094616778e-05, + "loss": 0.13694000244140625, + "step": 4218 + }, + { + "epoch": 0.2851831823712316, + "grad_norm": 0.1831972301006317, + "learning_rate": 2.5285799729489736e-05, + "loss": 0.0339202880859375, + "step": 4219 + }, + { + "epoch": 0.28525077734216575, + "grad_norm": 1.4543523788452148, + "learning_rate": 2.5283397866671736e-05, + "loss": 0.25341796875, + "step": 4220 + }, + { + "epoch": 0.2853183723130999, + "grad_norm": 0.6237524747848511, + "learning_rate": 2.5280995506278993e-05, + "loss": 0.1100006103515625, + "step": 4221 + }, + { + "epoch": 0.2853859672840341, + "grad_norm": 1.040971279144287, + "learning_rate": 2.5278592648427753e-05, + "loss": 0.1413116455078125, + "step": 4222 + }, + { + "epoch": 0.2854535622549682, + "grad_norm": 0.6959629654884338, + "learning_rate": 2.5276189293234275e-05, + "loss": 0.1359710693359375, + "step": 4223 + }, + { + "epoch": 0.2855211572259024, + "grad_norm": 0.8037471771240234, + "learning_rate": 2.527378544081486e-05, + "loss": 0.187103271484375, + "step": 4224 + }, + { + "epoch": 0.28558875219683655, + "grad_norm": 0.7198925614356995, + "learning_rate": 2.5271381091285806e-05, + "loss": 0.15020751953125, + "step": 4225 + }, + { + "epoch": 0.2856563471677707, + "grad_norm": 0.5300019979476929, + "learning_rate": 2.5268976244763464e-05, + "loss": 0.15334320068359375, + "step": 4226 + }, + { + "epoch": 0.2857239421387049, + "grad_norm": 0.7797665596008301, + "learning_rate": 2.5266570901364183e-05, + "loss": 0.12711334228515625, + "step": 4227 + }, + { + "epoch": 0.28579153710963906, + "grad_norm": 1.3147305250167847, + "learning_rate": 2.5264165061204357e-05, + "loss": 0.202880859375, + "step": 4228 + }, + { + "epoch": 0.28585913208057323, + "grad_norm": 0.3894413113594055, + "learning_rate": 2.5261758724400392e-05, + "loss": 0.059406280517578125, + "step": 4229 + }, + { + "epoch": 0.28592672705150735, + "grad_norm": 0.6247170567512512, + "learning_rate": 2.5259351891068727e-05, + "loss": 0.109954833984375, + "step": 4230 + }, + { + "epoch": 0.2859943220224415, + "grad_norm": 0.718635618686676, + "learning_rate": 2.5256944561325813e-05, + "loss": 0.133056640625, + "step": 4231 + }, + { + "epoch": 0.2860619169933757, + "grad_norm": 0.6102437376976013, + "learning_rate": 2.525453673528813e-05, + "loss": 0.144256591796875, + "step": 4232 + }, + { + "epoch": 0.28612951196430986, + "grad_norm": 0.3574042022228241, + "learning_rate": 2.5252128413072193e-05, + "loss": 0.07792282104492188, + "step": 4233 + }, + { + "epoch": 0.28619710693524403, + "grad_norm": 1.1296249628067017, + "learning_rate": 2.5249719594794523e-05, + "loss": 0.15408897399902344, + "step": 4234 + }, + { + "epoch": 0.2862647019061782, + "grad_norm": 0.542026937007904, + "learning_rate": 2.5247310280571677e-05, + "loss": 0.09173965454101562, + "step": 4235 + }, + { + "epoch": 0.28633229687711237, + "grad_norm": 0.49295252561569214, + "learning_rate": 2.5244900470520232e-05, + "loss": 0.05941009521484375, + "step": 4236 + }, + { + "epoch": 0.2863998918480465, + "grad_norm": 0.5311846137046814, + "learning_rate": 2.5242490164756796e-05, + "loss": 0.10573959350585938, + "step": 4237 + }, + { + "epoch": 0.28646748681898065, + "grad_norm": 1.109147548675537, + "learning_rate": 2.524007936339798e-05, + "loss": 0.126800537109375, + "step": 4238 + }, + { + "epoch": 0.2865350817899148, + "grad_norm": 0.35329189896583557, + "learning_rate": 2.5237668066560445e-05, + "loss": 0.06470489501953125, + "step": 4239 + }, + { + "epoch": 0.286602676760849, + "grad_norm": 0.337207555770874, + "learning_rate": 2.523525627436086e-05, + "loss": 0.041412353515625, + "step": 4240 + }, + { + "epoch": 0.28667027173178317, + "grad_norm": 0.2558489441871643, + "learning_rate": 2.5232843986915927e-05, + "loss": 0.0589599609375, + "step": 4241 + }, + { + "epoch": 0.28673786670271734, + "grad_norm": 0.36771613359451294, + "learning_rate": 2.5230431204342365e-05, + "loss": 0.0571746826171875, + "step": 4242 + }, + { + "epoch": 0.2868054616736515, + "grad_norm": 0.7033381462097168, + "learning_rate": 2.5228017926756914e-05, + "loss": 0.114501953125, + "step": 4243 + }, + { + "epoch": 0.2868730566445856, + "grad_norm": 0.40336519479751587, + "learning_rate": 2.5225604154276354e-05, + "loss": 0.0965576171875, + "step": 4244 + }, + { + "epoch": 0.2869406516155198, + "grad_norm": 0.24465416371822357, + "learning_rate": 2.522318988701747e-05, + "loss": 0.029605865478515625, + "step": 4245 + }, + { + "epoch": 0.28700824658645396, + "grad_norm": 1.1948267221450806, + "learning_rate": 2.5220775125097082e-05, + "loss": 0.1958465576171875, + "step": 4246 + }, + { + "epoch": 0.28707584155738813, + "grad_norm": 0.8057575225830078, + "learning_rate": 2.521835986863203e-05, + "loss": 0.13278961181640625, + "step": 4247 + }, + { + "epoch": 0.2871434365283223, + "grad_norm": 0.7069911360740662, + "learning_rate": 2.5215944117739183e-05, + "loss": 0.1456756591796875, + "step": 4248 + }, + { + "epoch": 0.2872110314992565, + "grad_norm": 0.5532438158988953, + "learning_rate": 2.5213527872535428e-05, + "loss": 0.098968505859375, + "step": 4249 + }, + { + "epoch": 0.28727862647019065, + "grad_norm": 1.127090573310852, + "learning_rate": 2.5211111133137675e-05, + "loss": 0.142852783203125, + "step": 4250 + }, + { + "epoch": 0.28734622144112476, + "grad_norm": 0.48874011635780334, + "learning_rate": 2.5208693899662864e-05, + "loss": 0.0692138671875, + "step": 4251 + }, + { + "epoch": 0.28741381641205893, + "grad_norm": 0.656795084476471, + "learning_rate": 2.5206276172227955e-05, + "loss": 0.14337158203125, + "step": 4252 + }, + { + "epoch": 0.2874814113829931, + "grad_norm": 0.36407271027565, + "learning_rate": 2.520385795094993e-05, + "loss": 0.0624542236328125, + "step": 4253 + }, + { + "epoch": 0.2875490063539273, + "grad_norm": 0.3683193027973175, + "learning_rate": 2.5201439235945804e-05, + "loss": 0.07992935180664062, + "step": 4254 + }, + { + "epoch": 0.28761660132486144, + "grad_norm": 0.9054065942764282, + "learning_rate": 2.5199020027332603e-05, + "loss": 0.10929107666015625, + "step": 4255 + }, + { + "epoch": 0.2876841962957956, + "grad_norm": 0.2900235652923584, + "learning_rate": 2.5196600325227386e-05, + "loss": 0.05712890625, + "step": 4256 + }, + { + "epoch": 0.28775179126672973, + "grad_norm": 0.2668868899345398, + "learning_rate": 2.5194180129747235e-05, + "loss": 0.05532073974609375, + "step": 4257 + }, + { + "epoch": 0.2878193862376639, + "grad_norm": 0.3518426716327667, + "learning_rate": 2.519175944100925e-05, + "loss": 0.09329605102539062, + "step": 4258 + }, + { + "epoch": 0.28788698120859807, + "grad_norm": 0.4782043695449829, + "learning_rate": 2.5189338259130564e-05, + "loss": 0.08405303955078125, + "step": 4259 + }, + { + "epoch": 0.28795457617953224, + "grad_norm": 0.35607025027275085, + "learning_rate": 2.518691658422833e-05, + "loss": 0.0734100341796875, + "step": 4260 + }, + { + "epoch": 0.2880221711504664, + "grad_norm": 0.5017089247703552, + "learning_rate": 2.518449441641971e-05, + "loss": 0.07843017578125, + "step": 4261 + }, + { + "epoch": 0.2880897661214006, + "grad_norm": 0.2590068578720093, + "learning_rate": 2.5182071755821923e-05, + "loss": 0.06134796142578125, + "step": 4262 + }, + { + "epoch": 0.28815736109233475, + "grad_norm": 0.2656378746032715, + "learning_rate": 2.5179648602552174e-05, + "loss": 0.04290771484375, + "step": 4263 + }, + { + "epoch": 0.28822495606326887, + "grad_norm": 0.3421018719673157, + "learning_rate": 2.5177224956727725e-05, + "loss": 0.040252685546875, + "step": 4264 + }, + { + "epoch": 0.28829255103420304, + "grad_norm": 0.9571665525436401, + "learning_rate": 2.517480081846584e-05, + "loss": 0.159515380859375, + "step": 4265 + }, + { + "epoch": 0.2883601460051372, + "grad_norm": 2.497046947479248, + "learning_rate": 2.5172376187883814e-05, + "loss": 0.261993408203125, + "step": 4266 + }, + { + "epoch": 0.2884277409760714, + "grad_norm": 0.3269667625427246, + "learning_rate": 2.516995106509897e-05, + "loss": 0.06217193603515625, + "step": 4267 + }, + { + "epoch": 0.28849533594700555, + "grad_norm": 0.38941237330436707, + "learning_rate": 2.516752545022864e-05, + "loss": 0.05509185791015625, + "step": 4268 + }, + { + "epoch": 0.2885629309179397, + "grad_norm": 0.22546504437923431, + "learning_rate": 2.51650993433902e-05, + "loss": 0.0545806884765625, + "step": 4269 + }, + { + "epoch": 0.2886305258888739, + "grad_norm": 0.8768539428710938, + "learning_rate": 2.516267274470104e-05, + "loss": 0.178558349609375, + "step": 4270 + }, + { + "epoch": 0.288698120859808, + "grad_norm": 1.365513801574707, + "learning_rate": 2.5160245654278572e-05, + "loss": 0.206085205078125, + "step": 4271 + }, + { + "epoch": 0.2887657158307422, + "grad_norm": 0.5318976044654846, + "learning_rate": 2.5157818072240236e-05, + "loss": 0.075347900390625, + "step": 4272 + }, + { + "epoch": 0.28883331080167635, + "grad_norm": 0.391691654920578, + "learning_rate": 2.5155389998703486e-05, + "loss": 0.07454681396484375, + "step": 4273 + }, + { + "epoch": 0.2889009057726105, + "grad_norm": 0.39156582951545715, + "learning_rate": 2.5152961433785818e-05, + "loss": 0.07397842407226562, + "step": 4274 + }, + { + "epoch": 0.2889685007435447, + "grad_norm": 1.0557615756988525, + "learning_rate": 2.515053237760473e-05, + "loss": 0.195556640625, + "step": 4275 + }, + { + "epoch": 0.28903609571447886, + "grad_norm": 0.5378817319869995, + "learning_rate": 2.514810283027776e-05, + "loss": 0.1041717529296875, + "step": 4276 + }, + { + "epoch": 0.28910369068541303, + "grad_norm": 1.0040233135223389, + "learning_rate": 2.5145672791922465e-05, + "loss": 0.156982421875, + "step": 4277 + }, + { + "epoch": 0.28917128565634714, + "grad_norm": 0.18523836135864258, + "learning_rate": 2.5143242262656425e-05, + "loss": 0.03990936279296875, + "step": 4278 + }, + { + "epoch": 0.2892388806272813, + "grad_norm": 1.1438024044036865, + "learning_rate": 2.5140811242597248e-05, + "loss": 0.13071441650390625, + "step": 4279 + }, + { + "epoch": 0.2893064755982155, + "grad_norm": 1.7256172895431519, + "learning_rate": 2.513837973186255e-05, + "loss": 0.1866912841796875, + "step": 4280 + }, + { + "epoch": 0.28937407056914966, + "grad_norm": 1.1703824996948242, + "learning_rate": 2.513594773057e-05, + "loss": 0.13296890258789062, + "step": 4281 + }, + { + "epoch": 0.2894416655400838, + "grad_norm": 0.26487138867378235, + "learning_rate": 2.513351523883726e-05, + "loss": 0.0412139892578125, + "step": 4282 + }, + { + "epoch": 0.289509260511018, + "grad_norm": 0.6067199110984802, + "learning_rate": 2.5131082256782034e-05, + "loss": 0.12369537353515625, + "step": 4283 + }, + { + "epoch": 0.28957685548195217, + "grad_norm": 0.2295897752046585, + "learning_rate": 2.512864878452204e-05, + "loss": 0.05464935302734375, + "step": 4284 + }, + { + "epoch": 0.2896444504528863, + "grad_norm": 0.31130972504615784, + "learning_rate": 2.5126214822175032e-05, + "loss": 0.03546905517578125, + "step": 4285 + }, + { + "epoch": 0.28971204542382045, + "grad_norm": 0.30244389176368713, + "learning_rate": 2.5123780369858776e-05, + "loss": 0.06114959716796875, + "step": 4286 + }, + { + "epoch": 0.2897796403947546, + "grad_norm": 0.4451507031917572, + "learning_rate": 2.5121345427691065e-05, + "loss": 0.07535552978515625, + "step": 4287 + }, + { + "epoch": 0.2898472353656888, + "grad_norm": 0.7335606217384338, + "learning_rate": 2.511890999578972e-05, + "loss": 0.16832733154296875, + "step": 4288 + }, + { + "epoch": 0.28991483033662296, + "grad_norm": 1.332485318183899, + "learning_rate": 2.511647407427258e-05, + "loss": 0.16827392578125, + "step": 4289 + }, + { + "epoch": 0.28998242530755713, + "grad_norm": 0.29134252667427063, + "learning_rate": 2.5114037663257513e-05, + "loss": 0.050380706787109375, + "step": 4290 + }, + { + "epoch": 0.2900500202784913, + "grad_norm": 0.30939164757728577, + "learning_rate": 2.5111600762862404e-05, + "loss": 0.05686187744140625, + "step": 4291 + }, + { + "epoch": 0.2901176152494254, + "grad_norm": 0.3710322678089142, + "learning_rate": 2.5109163373205165e-05, + "loss": 0.07196807861328125, + "step": 4292 + }, + { + "epoch": 0.2901852102203596, + "grad_norm": 0.481349915266037, + "learning_rate": 2.5106725494403732e-05, + "loss": 0.11978912353515625, + "step": 4293 + }, + { + "epoch": 0.29025280519129376, + "grad_norm": 0.4803808331489563, + "learning_rate": 2.510428712657607e-05, + "loss": 0.0935821533203125, + "step": 4294 + }, + { + "epoch": 0.29032040016222793, + "grad_norm": 0.6945628523826599, + "learning_rate": 2.5101848269840162e-05, + "loss": 0.171722412109375, + "step": 4295 + }, + { + "epoch": 0.2903879951331621, + "grad_norm": 0.3674992322921753, + "learning_rate": 2.5099408924314005e-05, + "loss": 0.074798583984375, + "step": 4296 + }, + { + "epoch": 0.2904555901040963, + "grad_norm": 0.6817699074745178, + "learning_rate": 2.509696909011564e-05, + "loss": 0.114715576171875, + "step": 4297 + }, + { + "epoch": 0.29052318507503044, + "grad_norm": 1.1519572734832764, + "learning_rate": 2.509452876736312e-05, + "loss": 0.275970458984375, + "step": 4298 + }, + { + "epoch": 0.29059078004596456, + "grad_norm": 0.8401638269424438, + "learning_rate": 2.509208795617452e-05, + "loss": 0.12831497192382812, + "step": 4299 + }, + { + "epoch": 0.29065837501689873, + "grad_norm": 0.5006293058395386, + "learning_rate": 2.5089646656667937e-05, + "loss": 0.0897369384765625, + "step": 4300 + }, + { + "epoch": 0.2907259699878329, + "grad_norm": 2.0020904541015625, + "learning_rate": 2.508720486896151e-05, + "loss": 0.19110107421875, + "step": 4301 + }, + { + "epoch": 0.29079356495876707, + "grad_norm": 0.32289204001426697, + "learning_rate": 2.5084762593173375e-05, + "loss": 0.0650482177734375, + "step": 4302 + }, + { + "epoch": 0.29086115992970124, + "grad_norm": 0.9084901213645935, + "learning_rate": 2.508231982942171e-05, + "loss": 0.18862152099609375, + "step": 4303 + }, + { + "epoch": 0.2909287549006354, + "grad_norm": 0.3740685284137726, + "learning_rate": 2.507987657782471e-05, + "loss": 0.076995849609375, + "step": 4304 + }, + { + "epoch": 0.2909963498715696, + "grad_norm": 0.5742753148078918, + "learning_rate": 2.5077432838500594e-05, + "loss": 0.142303466796875, + "step": 4305 + }, + { + "epoch": 0.2910639448425037, + "grad_norm": 0.6015909314155579, + "learning_rate": 2.5074988611567614e-05, + "loss": 0.11260223388671875, + "step": 4306 + }, + { + "epoch": 0.29113153981343787, + "grad_norm": 0.4375464618206024, + "learning_rate": 2.507254389714402e-05, + "loss": 0.05808258056640625, + "step": 4307 + }, + { + "epoch": 0.29119913478437204, + "grad_norm": 0.4930391311645508, + "learning_rate": 2.5070098695348117e-05, + "loss": 0.0817108154296875, + "step": 4308 + }, + { + "epoch": 0.2912667297553062, + "grad_norm": 0.2174253761768341, + "learning_rate": 2.5067653006298214e-05, + "loss": 0.031963348388671875, + "step": 4309 + }, + { + "epoch": 0.2913343247262404, + "grad_norm": 0.344633549451828, + "learning_rate": 2.5065206830112646e-05, + "loss": 0.05304718017578125, + "step": 4310 + }, + { + "epoch": 0.29140191969717455, + "grad_norm": 1.5950286388397217, + "learning_rate": 2.5062760166909777e-05, + "loss": 0.14872360229492188, + "step": 4311 + }, + { + "epoch": 0.2914695146681087, + "grad_norm": 0.8085669279098511, + "learning_rate": 2.5060313016807993e-05, + "loss": 0.16009521484375, + "step": 4312 + }, + { + "epoch": 0.29153710963904284, + "grad_norm": 0.3130180239677429, + "learning_rate": 2.50578653799257e-05, + "loss": 0.06198883056640625, + "step": 4313 + }, + { + "epoch": 0.291604704609977, + "grad_norm": 0.7841162085533142, + "learning_rate": 2.505541725638133e-05, + "loss": 0.1616058349609375, + "step": 4314 + }, + { + "epoch": 0.2916722995809112, + "grad_norm": 0.6915345191955566, + "learning_rate": 2.505296864629334e-05, + "loss": 0.13375091552734375, + "step": 4315 + }, + { + "epoch": 0.29173989455184535, + "grad_norm": 0.1850990504026413, + "learning_rate": 2.5050519549780204e-05, + "loss": 0.0226287841796875, + "step": 4316 + }, + { + "epoch": 0.2918074895227795, + "grad_norm": 0.9235394597053528, + "learning_rate": 2.5048069966960432e-05, + "loss": 0.128662109375, + "step": 4317 + }, + { + "epoch": 0.2918750844937137, + "grad_norm": 0.5936786532402039, + "learning_rate": 2.504561989795255e-05, + "loss": 0.1067657470703125, + "step": 4318 + }, + { + "epoch": 0.2919426794646478, + "grad_norm": 2.0868635177612305, + "learning_rate": 2.5043169342875105e-05, + "loss": 0.2711181640625, + "step": 4319 + }, + { + "epoch": 0.292010274435582, + "grad_norm": 0.909162163734436, + "learning_rate": 2.5040718301846664e-05, + "loss": 0.14505767822265625, + "step": 4320 + }, + { + "epoch": 0.29207786940651614, + "grad_norm": 0.8992252349853516, + "learning_rate": 2.503826677498583e-05, + "loss": 0.1297607421875, + "step": 4321 + }, + { + "epoch": 0.2921454643774503, + "grad_norm": 1.2048307657241821, + "learning_rate": 2.5035814762411222e-05, + "loss": 0.198638916015625, + "step": 4322 + }, + { + "epoch": 0.2922130593483845, + "grad_norm": 0.8525638580322266, + "learning_rate": 2.5033362264241485e-05, + "loss": 0.14107513427734375, + "step": 4323 + }, + { + "epoch": 0.29228065431931866, + "grad_norm": 0.3537648916244507, + "learning_rate": 2.5030909280595286e-05, + "loss": 0.03946685791015625, + "step": 4324 + }, + { + "epoch": 0.2923482492902528, + "grad_norm": 1.6277351379394531, + "learning_rate": 2.5028455811591313e-05, + "loss": 0.1856536865234375, + "step": 4325 + }, + { + "epoch": 0.29241584426118694, + "grad_norm": 1.4828866720199585, + "learning_rate": 2.502600185734828e-05, + "loss": 0.179595947265625, + "step": 4326 + }, + { + "epoch": 0.2924834392321211, + "grad_norm": 0.6109051704406738, + "learning_rate": 2.502354741798493e-05, + "loss": 0.144287109375, + "step": 4327 + }, + { + "epoch": 0.2925510342030553, + "grad_norm": 0.24267026782035828, + "learning_rate": 2.5021092493620015e-05, + "loss": 0.02460479736328125, + "step": 4328 + }, + { + "epoch": 0.29261862917398945, + "grad_norm": 0.6747812628746033, + "learning_rate": 2.501863708437233e-05, + "loss": 0.1648712158203125, + "step": 4329 + }, + { + "epoch": 0.2926862241449236, + "grad_norm": 1.1713019609451294, + "learning_rate": 2.5016181190360674e-05, + "loss": 0.206329345703125, + "step": 4330 + }, + { + "epoch": 0.2927538191158578, + "grad_norm": 0.18509025871753693, + "learning_rate": 2.5013724811703887e-05, + "loss": 0.032474517822265625, + "step": 4331 + }, + { + "epoch": 0.29282141408679196, + "grad_norm": 1.2702243328094482, + "learning_rate": 2.501126794852081e-05, + "loss": 0.16035842895507812, + "step": 4332 + }, + { + "epoch": 0.2928890090577261, + "grad_norm": 0.3044550120830536, + "learning_rate": 2.5008810600930337e-05, + "loss": 0.04126739501953125, + "step": 4333 + }, + { + "epoch": 0.29295660402866025, + "grad_norm": 0.42204222083091736, + "learning_rate": 2.500635276905136e-05, + "loss": 0.084716796875, + "step": 4334 + }, + { + "epoch": 0.2930241989995944, + "grad_norm": 1.8167082071304321, + "learning_rate": 2.500389445300281e-05, + "loss": 0.234893798828125, + "step": 4335 + }, + { + "epoch": 0.2930917939705286, + "grad_norm": 0.9455018639564514, + "learning_rate": 2.500143565290363e-05, + "loss": 0.13907623291015625, + "step": 4336 + }, + { + "epoch": 0.29315938894146276, + "grad_norm": 0.8188844919204712, + "learning_rate": 2.4998976368872796e-05, + "loss": 0.178466796875, + "step": 4337 + }, + { + "epoch": 0.29322698391239693, + "grad_norm": 2.18027400970459, + "learning_rate": 2.4996516601029308e-05, + "loss": 0.2882080078125, + "step": 4338 + }, + { + "epoch": 0.2932945788833311, + "grad_norm": 0.9115905165672302, + "learning_rate": 2.4994056349492172e-05, + "loss": 0.1302642822265625, + "step": 4339 + }, + { + "epoch": 0.2933621738542652, + "grad_norm": 0.5135180354118347, + "learning_rate": 2.4991595614380436e-05, + "loss": 0.0778350830078125, + "step": 4340 + }, + { + "epoch": 0.2934297688251994, + "grad_norm": 0.8630072474479675, + "learning_rate": 2.498913439581317e-05, + "loss": 0.1454925537109375, + "step": 4341 + }, + { + "epoch": 0.29349736379613356, + "grad_norm": 0.9681957364082336, + "learning_rate": 2.498667269390946e-05, + "loss": 0.199859619140625, + "step": 4342 + }, + { + "epoch": 0.29356495876706773, + "grad_norm": 1.6218708753585815, + "learning_rate": 2.498421050878842e-05, + "loss": 0.2261962890625, + "step": 4343 + }, + { + "epoch": 0.2936325537380019, + "grad_norm": 1.2478057146072388, + "learning_rate": 2.4981747840569185e-05, + "loss": 0.144561767578125, + "step": 4344 + }, + { + "epoch": 0.29370014870893607, + "grad_norm": 0.5384851098060608, + "learning_rate": 2.4979284689370913e-05, + "loss": 0.0765228271484375, + "step": 4345 + }, + { + "epoch": 0.29376774367987024, + "grad_norm": 1.1846836805343628, + "learning_rate": 2.4976821055312785e-05, + "loss": 0.13697433471679688, + "step": 4346 + }, + { + "epoch": 0.29383533865080436, + "grad_norm": 1.23743736743927, + "learning_rate": 2.497435693851401e-05, + "loss": 0.1236114501953125, + "step": 4347 + }, + { + "epoch": 0.2939029336217385, + "grad_norm": 0.1814165860414505, + "learning_rate": 2.4971892339093817e-05, + "loss": 0.01956939697265625, + "step": 4348 + }, + { + "epoch": 0.2939705285926727, + "grad_norm": 0.7045738101005554, + "learning_rate": 2.4969427257171457e-05, + "loss": 0.136322021484375, + "step": 4349 + }, + { + "epoch": 0.29403812356360687, + "grad_norm": 0.9835715889930725, + "learning_rate": 2.4966961692866212e-05, + "loss": 0.10861587524414062, + "step": 4350 + }, + { + "epoch": 0.29410571853454104, + "grad_norm": 1.119576096534729, + "learning_rate": 2.4964495646297368e-05, + "loss": 0.17108154296875, + "step": 4351 + }, + { + "epoch": 0.2941733135054752, + "grad_norm": 1.0151622295379639, + "learning_rate": 2.4962029117584262e-05, + "loss": 0.12943267822265625, + "step": 4352 + }, + { + "epoch": 0.2942409084764094, + "grad_norm": 0.41027331352233887, + "learning_rate": 2.4959562106846237e-05, + "loss": 0.0901336669921875, + "step": 4353 + }, + { + "epoch": 0.2943085034473435, + "grad_norm": 0.3462083637714386, + "learning_rate": 2.4957094614202657e-05, + "loss": 0.074981689453125, + "step": 4354 + }, + { + "epoch": 0.29437609841827767, + "grad_norm": 0.6682287454605103, + "learning_rate": 2.495462663977291e-05, + "loss": 0.10901641845703125, + "step": 4355 + }, + { + "epoch": 0.29444369338921184, + "grad_norm": 0.3966875672340393, + "learning_rate": 2.4952158183676428e-05, + "loss": 0.046955108642578125, + "step": 4356 + }, + { + "epoch": 0.294511288360146, + "grad_norm": 0.4609107971191406, + "learning_rate": 2.4949689246032635e-05, + "loss": 0.102020263671875, + "step": 4357 + }, + { + "epoch": 0.2945788833310802, + "grad_norm": 0.9019327163696289, + "learning_rate": 2.4947219826961003e-05, + "loss": 0.106719970703125, + "step": 4358 + }, + { + "epoch": 0.29464647830201435, + "grad_norm": 0.39482617378234863, + "learning_rate": 2.4944749926581012e-05, + "loss": 0.0436859130859375, + "step": 4359 + }, + { + "epoch": 0.2947140732729485, + "grad_norm": 1.1589030027389526, + "learning_rate": 2.4942279545012175e-05, + "loss": 0.174835205078125, + "step": 4360 + }, + { + "epoch": 0.29478166824388263, + "grad_norm": 0.3131003677845001, + "learning_rate": 2.4939808682374028e-05, + "loss": 0.033939361572265625, + "step": 4361 + }, + { + "epoch": 0.2948492632148168, + "grad_norm": 1.4144095182418823, + "learning_rate": 2.493733733878612e-05, + "loss": 0.19866180419921875, + "step": 4362 + }, + { + "epoch": 0.294916858185751, + "grad_norm": 1.1212809085845947, + "learning_rate": 2.493486551436803e-05, + "loss": 0.187286376953125, + "step": 4363 + }, + { + "epoch": 0.29498445315668514, + "grad_norm": 0.6037439703941345, + "learning_rate": 2.493239320923936e-05, + "loss": 0.13153839111328125, + "step": 4364 + }, + { + "epoch": 0.2950520481276193, + "grad_norm": 0.8516002297401428, + "learning_rate": 2.492992042351974e-05, + "loss": 0.1554412841796875, + "step": 4365 + }, + { + "epoch": 0.2951196430985535, + "grad_norm": 1.088636875152588, + "learning_rate": 2.4927447157328818e-05, + "loss": 0.1478118896484375, + "step": 4366 + }, + { + "epoch": 0.29518723806948766, + "grad_norm": 0.6930854320526123, + "learning_rate": 2.4924973410786262e-05, + "loss": 0.15378570556640625, + "step": 4367 + }, + { + "epoch": 0.29525483304042177, + "grad_norm": 0.5155085921287537, + "learning_rate": 2.4922499184011766e-05, + "loss": 0.06011962890625, + "step": 4368 + }, + { + "epoch": 0.29532242801135594, + "grad_norm": 0.8090991377830505, + "learning_rate": 2.4920024477125058e-05, + "loss": 0.10755157470703125, + "step": 4369 + }, + { + "epoch": 0.2953900229822901, + "grad_norm": 0.26968085765838623, + "learning_rate": 2.491754929024587e-05, + "loss": 0.04319286346435547, + "step": 4370 + }, + { + "epoch": 0.2954576179532243, + "grad_norm": 1.2911697626113892, + "learning_rate": 2.4915073623493977e-05, + "loss": 0.1417236328125, + "step": 4371 + }, + { + "epoch": 0.29552521292415845, + "grad_norm": 2.4058175086975098, + "learning_rate": 2.4912597476989153e-05, + "loss": 0.26361083984375, + "step": 4372 + }, + { + "epoch": 0.2955928078950926, + "grad_norm": 0.9812642335891724, + "learning_rate": 2.491012085085122e-05, + "loss": 0.173248291015625, + "step": 4373 + }, + { + "epoch": 0.2956604028660268, + "grad_norm": 0.424529105424881, + "learning_rate": 2.4907643745200013e-05, + "loss": 0.09760284423828125, + "step": 4374 + }, + { + "epoch": 0.2957279978369609, + "grad_norm": 0.24703449010849, + "learning_rate": 2.4905166160155384e-05, + "loss": 0.04882049560546875, + "step": 4375 + }, + { + "epoch": 0.2957955928078951, + "grad_norm": 1.8685003519058228, + "learning_rate": 2.490268809583722e-05, + "loss": 0.251220703125, + "step": 4376 + }, + { + "epoch": 0.29586318777882925, + "grad_norm": 0.7698741555213928, + "learning_rate": 2.4900209552365415e-05, + "loss": 0.1722259521484375, + "step": 4377 + }, + { + "epoch": 0.2959307827497634, + "grad_norm": 0.39949584007263184, + "learning_rate": 2.4897730529859908e-05, + "loss": 0.07891464233398438, + "step": 4378 + }, + { + "epoch": 0.2959983777206976, + "grad_norm": 0.8863953948020935, + "learning_rate": 2.4895251028440644e-05, + "loss": 0.213653564453125, + "step": 4379 + }, + { + "epoch": 0.29606597269163176, + "grad_norm": 0.27052944898605347, + "learning_rate": 2.4892771048227597e-05, + "loss": 0.040271759033203125, + "step": 4380 + }, + { + "epoch": 0.2961335676625659, + "grad_norm": 0.7226665019989014, + "learning_rate": 2.4890290589340768e-05, + "loss": 0.111083984375, + "step": 4381 + }, + { + "epoch": 0.29620116263350005, + "grad_norm": 0.6378945708274841, + "learning_rate": 2.488780965190017e-05, + "loss": 0.10702133178710938, + "step": 4382 + }, + { + "epoch": 0.2962687576044342, + "grad_norm": 1.1224745512008667, + "learning_rate": 2.4885328236025843e-05, + "loss": 0.2203369140625, + "step": 4383 + }, + { + "epoch": 0.2963363525753684, + "grad_norm": 0.8364994525909424, + "learning_rate": 2.488284634183787e-05, + "loss": 0.06407928466796875, + "step": 4384 + }, + { + "epoch": 0.29640394754630256, + "grad_norm": 0.24809837341308594, + "learning_rate": 2.4880363969456325e-05, + "loss": 0.042659759521484375, + "step": 4385 + }, + { + "epoch": 0.29647154251723673, + "grad_norm": 0.2987930476665497, + "learning_rate": 2.4877881119001328e-05, + "loss": 0.0573272705078125, + "step": 4386 + }, + { + "epoch": 0.2965391374881709, + "grad_norm": 1.4090075492858887, + "learning_rate": 2.487539779059301e-05, + "loss": 0.152191162109375, + "step": 4387 + }, + { + "epoch": 0.296606732459105, + "grad_norm": 0.45027655363082886, + "learning_rate": 2.4872913984351536e-05, + "loss": 0.1021881103515625, + "step": 4388 + }, + { + "epoch": 0.2966743274300392, + "grad_norm": 0.49117425084114075, + "learning_rate": 2.487042970039708e-05, + "loss": 0.106109619140625, + "step": 4389 + }, + { + "epoch": 0.29674192240097336, + "grad_norm": 0.3584362864494324, + "learning_rate": 2.4867944938849855e-05, + "loss": 0.0707244873046875, + "step": 4390 + }, + { + "epoch": 0.2968095173719075, + "grad_norm": 1.1836107969284058, + "learning_rate": 2.4865459699830082e-05, + "loss": 0.2010650634765625, + "step": 4391 + }, + { + "epoch": 0.2968771123428417, + "grad_norm": 0.21924364566802979, + "learning_rate": 2.486297398345802e-05, + "loss": 0.022878646850585938, + "step": 4392 + }, + { + "epoch": 0.29694470731377587, + "grad_norm": 1.494795560836792, + "learning_rate": 2.4860487789853935e-05, + "loss": 0.23199462890625, + "step": 4393 + }, + { + "epoch": 0.29701230228471004, + "grad_norm": 0.7670368552207947, + "learning_rate": 2.4858001119138136e-05, + "loss": 0.0923919677734375, + "step": 4394 + }, + { + "epoch": 0.29707989725564415, + "grad_norm": 0.2810404300689697, + "learning_rate": 2.4855513971430928e-05, + "loss": 0.05710601806640625, + "step": 4395 + }, + { + "epoch": 0.2971474922265783, + "grad_norm": 0.4297637641429901, + "learning_rate": 2.4853026346852672e-05, + "loss": 0.052585601806640625, + "step": 4396 + }, + { + "epoch": 0.2972150871975125, + "grad_norm": 0.6266050338745117, + "learning_rate": 2.485053824552372e-05, + "loss": 0.1290740966796875, + "step": 4397 + }, + { + "epoch": 0.29728268216844667, + "grad_norm": 1.2067087888717651, + "learning_rate": 2.484804966756447e-05, + "loss": 0.23834228515625, + "step": 4398 + }, + { + "epoch": 0.29735027713938084, + "grad_norm": 0.4054465591907501, + "learning_rate": 2.4845560613095334e-05, + "loss": 0.09851837158203125, + "step": 4399 + }, + { + "epoch": 0.297417872110315, + "grad_norm": 0.3794708847999573, + "learning_rate": 2.4843071082236745e-05, + "loss": 0.05086517333984375, + "step": 4400 + }, + { + "epoch": 0.2974854670812492, + "grad_norm": 0.9051774144172668, + "learning_rate": 2.4840581075109165e-05, + "loss": 0.1322479248046875, + "step": 4401 + }, + { + "epoch": 0.2975530620521833, + "grad_norm": 0.5098279714584351, + "learning_rate": 2.4838090591833072e-05, + "loss": 0.0825653076171875, + "step": 4402 + }, + { + "epoch": 0.29762065702311746, + "grad_norm": 1.1585001945495605, + "learning_rate": 2.4835599632528972e-05, + "loss": 0.186248779296875, + "step": 4403 + }, + { + "epoch": 0.29768825199405163, + "grad_norm": 0.4759312570095062, + "learning_rate": 2.4833108197317402e-05, + "loss": 0.11140060424804688, + "step": 4404 + }, + { + "epoch": 0.2977558469649858, + "grad_norm": 0.46618714928627014, + "learning_rate": 2.48306162863189e-05, + "loss": 0.1125946044921875, + "step": 4405 + }, + { + "epoch": 0.29782344193592, + "grad_norm": 0.6560651659965515, + "learning_rate": 2.482812389965405e-05, + "loss": 0.1467132568359375, + "step": 4406 + }, + { + "epoch": 0.29789103690685415, + "grad_norm": 0.3458667993545532, + "learning_rate": 2.4825631037443444e-05, + "loss": 0.06620025634765625, + "step": 4407 + }, + { + "epoch": 0.2979586318777883, + "grad_norm": 0.45367631316185, + "learning_rate": 2.4823137699807702e-05, + "loss": 0.06644439697265625, + "step": 4408 + }, + { + "epoch": 0.29802622684872243, + "grad_norm": 1.3426148891448975, + "learning_rate": 2.482064388686747e-05, + "loss": 0.14910125732421875, + "step": 4409 + }, + { + "epoch": 0.2980938218196566, + "grad_norm": 0.8015307784080505, + "learning_rate": 2.4818149598743412e-05, + "loss": 0.170684814453125, + "step": 4410 + }, + { + "epoch": 0.29816141679059077, + "grad_norm": 0.7223756313323975, + "learning_rate": 2.4815654835556218e-05, + "loss": 0.1376800537109375, + "step": 4411 + }, + { + "epoch": 0.29822901176152494, + "grad_norm": 1.4121063947677612, + "learning_rate": 2.48131595974266e-05, + "loss": 0.168212890625, + "step": 4412 + }, + { + "epoch": 0.2982966067324591, + "grad_norm": 0.8114190697669983, + "learning_rate": 2.4810663884475295e-05, + "loss": 0.14922332763671875, + "step": 4413 + }, + { + "epoch": 0.2983642017033933, + "grad_norm": 0.7187215685844421, + "learning_rate": 2.4808167696823062e-05, + "loss": 0.10135269165039062, + "step": 4414 + }, + { + "epoch": 0.29843179667432745, + "grad_norm": 0.5678741931915283, + "learning_rate": 2.4805671034590673e-05, + "loss": 0.1230316162109375, + "step": 4415 + }, + { + "epoch": 0.29849939164526157, + "grad_norm": 0.6097378730773926, + "learning_rate": 2.4803173897898942e-05, + "loss": 0.13649749755859375, + "step": 4416 + }, + { + "epoch": 0.29856698661619574, + "grad_norm": 0.38748085498809814, + "learning_rate": 2.4800676286868693e-05, + "loss": 0.07030487060546875, + "step": 4417 + }, + { + "epoch": 0.2986345815871299, + "grad_norm": 0.7223446369171143, + "learning_rate": 2.479817820162077e-05, + "loss": 0.11733627319335938, + "step": 4418 + }, + { + "epoch": 0.2987021765580641, + "grad_norm": 0.8085072040557861, + "learning_rate": 2.4795679642276057e-05, + "loss": 0.12919235229492188, + "step": 4419 + }, + { + "epoch": 0.29876977152899825, + "grad_norm": 1.187864065170288, + "learning_rate": 2.4793180608955442e-05, + "loss": 0.1813812255859375, + "step": 4420 + }, + { + "epoch": 0.2988373664999324, + "grad_norm": 0.29671016335487366, + "learning_rate": 2.4790681101779846e-05, + "loss": 0.05352020263671875, + "step": 4421 + }, + { + "epoch": 0.2989049614708666, + "grad_norm": 0.8075543642044067, + "learning_rate": 2.4788181120870215e-05, + "loss": 0.11848831176757812, + "step": 4422 + }, + { + "epoch": 0.2989725564418007, + "grad_norm": 0.8004869818687439, + "learning_rate": 2.478568066634751e-05, + "loss": 0.181182861328125, + "step": 4423 + }, + { + "epoch": 0.2990401514127349, + "grad_norm": 0.8573794364929199, + "learning_rate": 2.478317973833271e-05, + "loss": 0.15874481201171875, + "step": 4424 + }, + { + "epoch": 0.29910774638366905, + "grad_norm": 1.1518208980560303, + "learning_rate": 2.4780678336946835e-05, + "loss": 0.22943115234375, + "step": 4425 + }, + { + "epoch": 0.2991753413546032, + "grad_norm": 0.7319296598434448, + "learning_rate": 2.477817646231092e-05, + "loss": 0.1533355712890625, + "step": 4426 + }, + { + "epoch": 0.2992429363255374, + "grad_norm": 1.2279380559921265, + "learning_rate": 2.4775674114546017e-05, + "loss": 0.12731170654296875, + "step": 4427 + }, + { + "epoch": 0.29931053129647156, + "grad_norm": 0.32533591985702515, + "learning_rate": 2.4773171293773204e-05, + "loss": 0.0560150146484375, + "step": 4428 + }, + { + "epoch": 0.29937812626740573, + "grad_norm": 0.7136726379394531, + "learning_rate": 2.4770668000113586e-05, + "loss": 0.09733963012695312, + "step": 4429 + }, + { + "epoch": 0.29944572123833985, + "grad_norm": 0.6613386273384094, + "learning_rate": 2.476816423368829e-05, + "loss": 0.1338043212890625, + "step": 4430 + }, + { + "epoch": 0.299513316209274, + "grad_norm": 0.6915056109428406, + "learning_rate": 2.4765659994618457e-05, + "loss": 0.174530029296875, + "step": 4431 + }, + { + "epoch": 0.2995809111802082, + "grad_norm": 0.45978862047195435, + "learning_rate": 2.4763155283025262e-05, + "loss": 0.10924911499023438, + "step": 4432 + }, + { + "epoch": 0.29964850615114236, + "grad_norm": 0.47068050503730774, + "learning_rate": 2.47606500990299e-05, + "loss": 0.10831451416015625, + "step": 4433 + }, + { + "epoch": 0.29971610112207653, + "grad_norm": 1.2024542093276978, + "learning_rate": 2.4758144442753588e-05, + "loss": 0.1965484619140625, + "step": 4434 + }, + { + "epoch": 0.2997836960930107, + "grad_norm": 0.3515658676624298, + "learning_rate": 2.475563831431756e-05, + "loss": 0.071868896484375, + "step": 4435 + }, + { + "epoch": 0.29985129106394487, + "grad_norm": 0.3459491729736328, + "learning_rate": 2.4753131713843078e-05, + "loss": 0.06227874755859375, + "step": 4436 + }, + { + "epoch": 0.299918886034879, + "grad_norm": 1.6255524158477783, + "learning_rate": 2.4750624641451433e-05, + "loss": 0.30255126953125, + "step": 4437 + }, + { + "epoch": 0.29998648100581315, + "grad_norm": 0.5548089742660522, + "learning_rate": 2.4748117097263927e-05, + "loss": 0.07329559326171875, + "step": 4438 + }, + { + "epoch": 0.3000540759767473, + "grad_norm": 1.0178712606430054, + "learning_rate": 2.4745609081401893e-05, + "loss": 0.1407012939453125, + "step": 4439 + }, + { + "epoch": 0.3001216709476815, + "grad_norm": 0.6565298438072205, + "learning_rate": 2.4743100593986686e-05, + "loss": 0.11943817138671875, + "step": 4440 + }, + { + "epoch": 0.30018926591861567, + "grad_norm": 0.4082392156124115, + "learning_rate": 2.474059163513968e-05, + "loss": 0.1073455810546875, + "step": 4441 + }, + { + "epoch": 0.30025686088954984, + "grad_norm": 0.5882676839828491, + "learning_rate": 2.4738082204982275e-05, + "loss": 0.12349700927734375, + "step": 4442 + }, + { + "epoch": 0.300324455860484, + "grad_norm": 0.5361059308052063, + "learning_rate": 2.4735572303635888e-05, + "loss": 0.06763458251953125, + "step": 4443 + }, + { + "epoch": 0.3003920508314181, + "grad_norm": 0.31201156973838806, + "learning_rate": 2.4733061931221973e-05, + "loss": 0.050868988037109375, + "step": 4444 + }, + { + "epoch": 0.3004596458023523, + "grad_norm": 0.9066802263259888, + "learning_rate": 2.473055108786199e-05, + "loss": 0.1905517578125, + "step": 4445 + }, + { + "epoch": 0.30052724077328646, + "grad_norm": 0.5957356691360474, + "learning_rate": 2.4728039773677435e-05, + "loss": 0.11530303955078125, + "step": 4446 + }, + { + "epoch": 0.30059483574422063, + "grad_norm": 0.35947343707084656, + "learning_rate": 2.472552798878982e-05, + "loss": 0.0469670295715332, + "step": 4447 + }, + { + "epoch": 0.3006624307151548, + "grad_norm": 1.481297254562378, + "learning_rate": 2.4723015733320673e-05, + "loss": 0.154937744140625, + "step": 4448 + }, + { + "epoch": 0.300730025686089, + "grad_norm": 0.33846840262413025, + "learning_rate": 2.4720503007391555e-05, + "loss": 0.07241058349609375, + "step": 4449 + }, + { + "epoch": 0.3007976206570231, + "grad_norm": 0.8364344835281372, + "learning_rate": 2.471798981112406e-05, + "loss": 0.096160888671875, + "step": 4450 + }, + { + "epoch": 0.30086521562795726, + "grad_norm": 0.6203365921974182, + "learning_rate": 2.471547614463977e-05, + "loss": 0.1353302001953125, + "step": 4451 + }, + { + "epoch": 0.30093281059889143, + "grad_norm": 0.7108100056648254, + "learning_rate": 2.4712962008060336e-05, + "loss": 0.0904693603515625, + "step": 4452 + }, + { + "epoch": 0.3010004055698256, + "grad_norm": 0.39527109265327454, + "learning_rate": 2.471044740150739e-05, + "loss": 0.0932464599609375, + "step": 4453 + }, + { + "epoch": 0.3010680005407598, + "grad_norm": 0.6977356672286987, + "learning_rate": 2.470793232510261e-05, + "loss": 0.12835693359375, + "step": 4454 + }, + { + "epoch": 0.30113559551169394, + "grad_norm": 0.48125240206718445, + "learning_rate": 2.4705416778967697e-05, + "loss": 0.10382843017578125, + "step": 4455 + }, + { + "epoch": 0.3012031904826281, + "grad_norm": 0.5528872609138489, + "learning_rate": 2.4702900763224353e-05, + "loss": 0.1026763916015625, + "step": 4456 + }, + { + "epoch": 0.30127078545356223, + "grad_norm": 0.3575860857963562, + "learning_rate": 2.4700384277994337e-05, + "loss": 0.07513427734375, + "step": 4457 + }, + { + "epoch": 0.3013383804244964, + "grad_norm": 0.8725442886352539, + "learning_rate": 2.4697867323399398e-05, + "loss": 0.181243896484375, + "step": 4458 + }, + { + "epoch": 0.30140597539543057, + "grad_norm": 0.37323614954948425, + "learning_rate": 2.469534989956133e-05, + "loss": 0.09452056884765625, + "step": 4459 + }, + { + "epoch": 0.30147357036636474, + "grad_norm": 1.3418151140213013, + "learning_rate": 2.469283200660194e-05, + "loss": 0.177032470703125, + "step": 4460 + }, + { + "epoch": 0.3015411653372989, + "grad_norm": 0.5016721487045288, + "learning_rate": 2.4690313644643056e-05, + "loss": 0.086639404296875, + "step": 4461 + }, + { + "epoch": 0.3016087603082331, + "grad_norm": 0.7893211841583252, + "learning_rate": 2.4687794813806536e-05, + "loss": 0.0899658203125, + "step": 4462 + }, + { + "epoch": 0.30167635527916725, + "grad_norm": 1.4343576431274414, + "learning_rate": 2.4685275514214255e-05, + "loss": 0.2005462646484375, + "step": 4463 + }, + { + "epoch": 0.30174395025010137, + "grad_norm": 0.33018404245376587, + "learning_rate": 2.468275574598811e-05, + "loss": 0.068359375, + "step": 4464 + }, + { + "epoch": 0.30181154522103554, + "grad_norm": 1.34268319606781, + "learning_rate": 2.468023550925003e-05, + "loss": 0.166748046875, + "step": 4465 + }, + { + "epoch": 0.3018791401919697, + "grad_norm": 1.1031147241592407, + "learning_rate": 2.4677714804121954e-05, + "loss": 0.1515350341796875, + "step": 4466 + }, + { + "epoch": 0.3019467351629039, + "grad_norm": 0.6590585708618164, + "learning_rate": 2.4675193630725847e-05, + "loss": 0.14482879638671875, + "step": 4467 + }, + { + "epoch": 0.30201433013383805, + "grad_norm": 0.5135082602500916, + "learning_rate": 2.467267198918371e-05, + "loss": 0.08080291748046875, + "step": 4468 + }, + { + "epoch": 0.3020819251047722, + "grad_norm": 0.6432160139083862, + "learning_rate": 2.4670149879617543e-05, + "loss": 0.1182708740234375, + "step": 4469 + }, + { + "epoch": 0.3021495200757064, + "grad_norm": 0.495730996131897, + "learning_rate": 2.4667627302149386e-05, + "loss": 0.0782012939453125, + "step": 4470 + }, + { + "epoch": 0.3022171150466405, + "grad_norm": 0.23110517859458923, + "learning_rate": 2.46651042569013e-05, + "loss": 0.045867919921875, + "step": 4471 + }, + { + "epoch": 0.3022847100175747, + "grad_norm": 1.1063764095306396, + "learning_rate": 2.4662580743995365e-05, + "loss": 0.200164794921875, + "step": 4472 + }, + { + "epoch": 0.30235230498850885, + "grad_norm": 1.7390459775924683, + "learning_rate": 2.466005676355368e-05, + "loss": 0.17974090576171875, + "step": 4473 + }, + { + "epoch": 0.302419899959443, + "grad_norm": 0.6429286003112793, + "learning_rate": 2.4657532315698378e-05, + "loss": 0.13824462890625, + "step": 4474 + }, + { + "epoch": 0.3024874949303772, + "grad_norm": 0.7823267579078674, + "learning_rate": 2.4655007400551597e-05, + "loss": 0.1451568603515625, + "step": 4475 + }, + { + "epoch": 0.30255508990131136, + "grad_norm": 0.6122609376907349, + "learning_rate": 2.4652482018235517e-05, + "loss": 0.12889862060546875, + "step": 4476 + }, + { + "epoch": 0.30262268487224553, + "grad_norm": 0.8227077126502991, + "learning_rate": 2.464995616887233e-05, + "loss": 0.1405029296875, + "step": 4477 + }, + { + "epoch": 0.30269027984317964, + "grad_norm": 1.0497558116912842, + "learning_rate": 2.464742985258425e-05, + "loss": 0.13264846801757812, + "step": 4478 + }, + { + "epoch": 0.3027578748141138, + "grad_norm": 0.5953544974327087, + "learning_rate": 2.4644903069493514e-05, + "loss": 0.10262298583984375, + "step": 4479 + }, + { + "epoch": 0.302825469785048, + "grad_norm": 0.5902438759803772, + "learning_rate": 2.4642375819722387e-05, + "loss": 0.11517333984375, + "step": 4480 + }, + { + "epoch": 0.30289306475598216, + "grad_norm": 0.13434851169586182, + "learning_rate": 2.463984810339316e-05, + "loss": 0.033599853515625, + "step": 4481 + }, + { + "epoch": 0.3029606597269163, + "grad_norm": 0.4539400339126587, + "learning_rate": 2.4637319920628127e-05, + "loss": 0.0664825439453125, + "step": 4482 + }, + { + "epoch": 0.3030282546978505, + "grad_norm": 1.2130296230316162, + "learning_rate": 2.463479127154962e-05, + "loss": 0.22003173828125, + "step": 4483 + }, + { + "epoch": 0.30309584966878467, + "grad_norm": 0.7981587648391724, + "learning_rate": 2.4632262156279995e-05, + "loss": 0.12242889404296875, + "step": 4484 + }, + { + "epoch": 0.3031634446397188, + "grad_norm": 0.4336645305156708, + "learning_rate": 2.4629732574941624e-05, + "loss": 0.0670928955078125, + "step": 4485 + }, + { + "epoch": 0.30323103961065295, + "grad_norm": 0.6056800484657288, + "learning_rate": 2.4627202527656904e-05, + "loss": 0.12540054321289062, + "step": 4486 + }, + { + "epoch": 0.3032986345815871, + "grad_norm": 0.8937817215919495, + "learning_rate": 2.4624672014548257e-05, + "loss": 0.209930419921875, + "step": 4487 + }, + { + "epoch": 0.3033662295525213, + "grad_norm": 0.7222045063972473, + "learning_rate": 2.462214103573812e-05, + "loss": 0.12880706787109375, + "step": 4488 + }, + { + "epoch": 0.30343382452345546, + "grad_norm": 0.32552266120910645, + "learning_rate": 2.4619609591348963e-05, + "loss": 0.05515289306640625, + "step": 4489 + }, + { + "epoch": 0.30350141949438963, + "grad_norm": 0.45556434988975525, + "learning_rate": 2.461707768150327e-05, + "loss": 0.094207763671875, + "step": 4490 + }, + { + "epoch": 0.3035690144653238, + "grad_norm": 0.5306344032287598, + "learning_rate": 2.461454530632355e-05, + "loss": 0.08157730102539062, + "step": 4491 + }, + { + "epoch": 0.3036366094362579, + "grad_norm": 0.6693031787872314, + "learning_rate": 2.4612012465932333e-05, + "loss": 0.10420989990234375, + "step": 4492 + }, + { + "epoch": 0.3037042044071921, + "grad_norm": 0.5044525861740112, + "learning_rate": 2.4609479160452185e-05, + "loss": 0.1399078369140625, + "step": 4493 + }, + { + "epoch": 0.30377179937812626, + "grad_norm": 0.18857775628566742, + "learning_rate": 2.4606945390005668e-05, + "loss": 0.040363311767578125, + "step": 4494 + }, + { + "epoch": 0.30383939434906043, + "grad_norm": 0.33798494935035706, + "learning_rate": 2.460441115471539e-05, + "loss": 0.048542022705078125, + "step": 4495 + }, + { + "epoch": 0.3039069893199946, + "grad_norm": 1.156778335571289, + "learning_rate": 2.460187645470397e-05, + "loss": 0.231781005859375, + "step": 4496 + }, + { + "epoch": 0.3039745842909288, + "grad_norm": 1.0585920810699463, + "learning_rate": 2.459934129009405e-05, + "loss": 0.133819580078125, + "step": 4497 + }, + { + "epoch": 0.30404217926186294, + "grad_norm": 0.7567737102508545, + "learning_rate": 2.4596805661008305e-05, + "loss": 0.12737274169921875, + "step": 4498 + }, + { + "epoch": 0.30410977423279706, + "grad_norm": 1.1107473373413086, + "learning_rate": 2.4594269567569423e-05, + "loss": 0.16043853759765625, + "step": 4499 + }, + { + "epoch": 0.30417736920373123, + "grad_norm": 0.23984317481517792, + "learning_rate": 2.4591733009900108e-05, + "loss": 0.05126953125, + "step": 4500 + }, + { + "epoch": 0.3042449641746654, + "grad_norm": 0.6362202167510986, + "learning_rate": 2.45891959881231e-05, + "loss": 0.17388916015625, + "step": 4501 + }, + { + "epoch": 0.30431255914559957, + "grad_norm": 0.48572489619255066, + "learning_rate": 2.4586658502361158e-05, + "loss": 0.079254150390625, + "step": 4502 + }, + { + "epoch": 0.30438015411653374, + "grad_norm": 0.4110420048236847, + "learning_rate": 2.4584120552737057e-05, + "loss": 0.09563446044921875, + "step": 4503 + }, + { + "epoch": 0.3044477490874679, + "grad_norm": 1.2302836179733276, + "learning_rate": 2.45815821393736e-05, + "loss": 0.18646240234375, + "step": 4504 + }, + { + "epoch": 0.3045153440584021, + "grad_norm": 1.0353277921676636, + "learning_rate": 2.457904326239362e-05, + "loss": 0.223236083984375, + "step": 4505 + }, + { + "epoch": 0.3045829390293362, + "grad_norm": 0.8385098576545715, + "learning_rate": 2.4576503921919942e-05, + "loss": 0.137786865234375, + "step": 4506 + }, + { + "epoch": 0.30465053400027037, + "grad_norm": 1.2457609176635742, + "learning_rate": 2.457396411807546e-05, + "loss": 0.150848388671875, + "step": 4507 + }, + { + "epoch": 0.30471812897120454, + "grad_norm": 0.7649333477020264, + "learning_rate": 2.4571423850983048e-05, + "loss": 0.09170913696289062, + "step": 4508 + }, + { + "epoch": 0.3047857239421387, + "grad_norm": 0.3953264653682709, + "learning_rate": 2.456888312076563e-05, + "loss": 0.0667266845703125, + "step": 4509 + }, + { + "epoch": 0.3048533189130729, + "grad_norm": 0.868013858795166, + "learning_rate": 2.4566341927546134e-05, + "loss": 0.1340618133544922, + "step": 4510 + }, + { + "epoch": 0.30492091388400705, + "grad_norm": 0.20563270151615143, + "learning_rate": 2.4563800271447524e-05, + "loss": 0.04869842529296875, + "step": 4511 + }, + { + "epoch": 0.30498850885494116, + "grad_norm": 0.5605899691581726, + "learning_rate": 2.4561258152592782e-05, + "loss": 0.113983154296875, + "step": 4512 + }, + { + "epoch": 0.30505610382587534, + "grad_norm": 0.5167469382286072, + "learning_rate": 2.455871557110491e-05, + "loss": 0.12483978271484375, + "step": 4513 + }, + { + "epoch": 0.3051236987968095, + "grad_norm": 0.378571480512619, + "learning_rate": 2.455617252710693e-05, + "loss": 0.089599609375, + "step": 4514 + }, + { + "epoch": 0.3051912937677437, + "grad_norm": 0.8181061148643494, + "learning_rate": 2.4553629020721896e-05, + "loss": 0.199462890625, + "step": 4515 + }, + { + "epoch": 0.30525888873867785, + "grad_norm": 0.8005092740058899, + "learning_rate": 2.455108505207288e-05, + "loss": 0.195037841796875, + "step": 4516 + }, + { + "epoch": 0.305326483709612, + "grad_norm": 0.38185569643974304, + "learning_rate": 2.454854062128297e-05, + "loss": 0.0661468505859375, + "step": 4517 + }, + { + "epoch": 0.3053940786805462, + "grad_norm": 0.2511201798915863, + "learning_rate": 2.4545995728475282e-05, + "loss": 0.038360595703125, + "step": 4518 + }, + { + "epoch": 0.3054616736514803, + "grad_norm": 0.500568687915802, + "learning_rate": 2.4543450373772956e-05, + "loss": 0.0906829833984375, + "step": 4519 + }, + { + "epoch": 0.3055292686224145, + "grad_norm": 1.4889492988586426, + "learning_rate": 2.4540904557299146e-05, + "loss": 0.16844940185546875, + "step": 4520 + }, + { + "epoch": 0.30559686359334864, + "grad_norm": 0.7865971922874451, + "learning_rate": 2.4538358279177042e-05, + "loss": 0.1248321533203125, + "step": 4521 + }, + { + "epoch": 0.3056644585642828, + "grad_norm": 0.6680657863616943, + "learning_rate": 2.453581153952985e-05, + "loss": 0.168731689453125, + "step": 4522 + }, + { + "epoch": 0.305732053535217, + "grad_norm": 0.8191923499107361, + "learning_rate": 2.453326433848079e-05, + "loss": 0.16180419921875, + "step": 4523 + }, + { + "epoch": 0.30579964850615116, + "grad_norm": 0.5522253513336182, + "learning_rate": 2.4530716676153113e-05, + "loss": 0.09822463989257812, + "step": 4524 + }, + { + "epoch": 0.3058672434770853, + "grad_norm": 0.7101283669471741, + "learning_rate": 2.4528168552670095e-05, + "loss": 0.1102752685546875, + "step": 4525 + }, + { + "epoch": 0.30593483844801944, + "grad_norm": 1.0227806568145752, + "learning_rate": 2.4525619968155023e-05, + "loss": 0.13452911376953125, + "step": 4526 + }, + { + "epoch": 0.3060024334189536, + "grad_norm": 0.30967414379119873, + "learning_rate": 2.452307092273122e-05, + "loss": 0.05710601806640625, + "step": 4527 + }, + { + "epoch": 0.3060700283898878, + "grad_norm": 1.2909176349639893, + "learning_rate": 2.452052141652203e-05, + "loss": 0.1512298583984375, + "step": 4528 + }, + { + "epoch": 0.30613762336082195, + "grad_norm": 1.015123963356018, + "learning_rate": 2.45179714496508e-05, + "loss": 0.197998046875, + "step": 4529 + }, + { + "epoch": 0.3062052183317561, + "grad_norm": 0.3432694673538208, + "learning_rate": 2.4515421022240917e-05, + "loss": 0.04584503173828125, + "step": 4530 + }, + { + "epoch": 0.3062728133026903, + "grad_norm": 0.5161628127098083, + "learning_rate": 2.4512870134415792e-05, + "loss": 0.12757110595703125, + "step": 4531 + }, + { + "epoch": 0.30634040827362446, + "grad_norm": 0.25234806537628174, + "learning_rate": 2.451031878629885e-05, + "loss": 0.04689598083496094, + "step": 4532 + }, + { + "epoch": 0.3064080032445586, + "grad_norm": 0.7058485746383667, + "learning_rate": 2.4507766978013538e-05, + "loss": 0.09138870239257812, + "step": 4533 + }, + { + "epoch": 0.30647559821549275, + "grad_norm": 1.0889911651611328, + "learning_rate": 2.4505214709683338e-05, + "loss": 0.192291259765625, + "step": 4534 + }, + { + "epoch": 0.3065431931864269, + "grad_norm": 0.4097745418548584, + "learning_rate": 2.450266198143173e-05, + "loss": 0.0792083740234375, + "step": 4535 + }, + { + "epoch": 0.3066107881573611, + "grad_norm": 0.7517828941345215, + "learning_rate": 2.4500108793382244e-05, + "loss": 0.12892913818359375, + "step": 4536 + }, + { + "epoch": 0.30667838312829526, + "grad_norm": 0.44491106271743774, + "learning_rate": 2.4497555145658416e-05, + "loss": 0.07142257690429688, + "step": 4537 + }, + { + "epoch": 0.30674597809922943, + "grad_norm": 0.6095007061958313, + "learning_rate": 2.44950010383838e-05, + "loss": 0.09779071807861328, + "step": 4538 + }, + { + "epoch": 0.3068135730701636, + "grad_norm": 0.9692427515983582, + "learning_rate": 2.4492446471681988e-05, + "loss": 0.212860107421875, + "step": 4539 + }, + { + "epoch": 0.3068811680410977, + "grad_norm": 0.27750036120414734, + "learning_rate": 2.4489891445676583e-05, + "loss": 0.05393218994140625, + "step": 4540 + }, + { + "epoch": 0.3069487630120319, + "grad_norm": 0.7836571931838989, + "learning_rate": 2.4487335960491216e-05, + "loss": 0.125335693359375, + "step": 4541 + }, + { + "epoch": 0.30701635798296606, + "grad_norm": 0.6967135071754456, + "learning_rate": 2.4484780016249526e-05, + "loss": 0.188690185546875, + "step": 4542 + }, + { + "epoch": 0.30708395295390023, + "grad_norm": 0.3899230659008026, + "learning_rate": 2.4482223613075197e-05, + "loss": 0.05591583251953125, + "step": 4543 + }, + { + "epoch": 0.3071515479248344, + "grad_norm": 0.8011699914932251, + "learning_rate": 2.4479666751091923e-05, + "loss": 0.138580322265625, + "step": 4544 + }, + { + "epoch": 0.30721914289576857, + "grad_norm": 0.443808376789093, + "learning_rate": 2.447710943042342e-05, + "loss": 0.10736083984375, + "step": 4545 + }, + { + "epoch": 0.30728673786670274, + "grad_norm": 0.5109126567840576, + "learning_rate": 2.4474551651193418e-05, + "loss": 0.11028289794921875, + "step": 4546 + }, + { + "epoch": 0.30735433283763686, + "grad_norm": 0.9749040007591248, + "learning_rate": 2.447199341352569e-05, + "loss": 0.202239990234375, + "step": 4547 + }, + { + "epoch": 0.307421927808571, + "grad_norm": 0.6344107985496521, + "learning_rate": 2.446943471754401e-05, + "loss": 0.124359130859375, + "step": 4548 + }, + { + "epoch": 0.3074895227795052, + "grad_norm": 0.34498170018196106, + "learning_rate": 2.4466875563372194e-05, + "loss": 0.0787353515625, + "step": 4549 + }, + { + "epoch": 0.30755711775043937, + "grad_norm": 0.3829247057437897, + "learning_rate": 2.4464315951134065e-05, + "loss": 0.07310771942138672, + "step": 4550 + }, + { + "epoch": 0.30762471272137354, + "grad_norm": 0.7583673000335693, + "learning_rate": 2.4461755880953473e-05, + "loss": 0.14162445068359375, + "step": 4551 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 1.022658348083496, + "learning_rate": 2.4459195352954283e-05, + "loss": 0.168548583984375, + "step": 4552 + }, + { + "epoch": 0.3077599026632419, + "grad_norm": 0.6091347932815552, + "learning_rate": 2.44566343672604e-05, + "loss": 0.159881591796875, + "step": 4553 + }, + { + "epoch": 0.307827497634176, + "grad_norm": 0.35219645500183105, + "learning_rate": 2.4454072923995737e-05, + "loss": 0.041876792907714844, + "step": 4554 + }, + { + "epoch": 0.30789509260511017, + "grad_norm": 0.5338138341903687, + "learning_rate": 2.4451511023284232e-05, + "loss": 0.0945587158203125, + "step": 4555 + }, + { + "epoch": 0.30796268757604434, + "grad_norm": 0.43900421261787415, + "learning_rate": 2.444894866524984e-05, + "loss": 0.08090972900390625, + "step": 4556 + }, + { + "epoch": 0.3080302825469785, + "grad_norm": 1.2795053720474243, + "learning_rate": 2.4446385850016556e-05, + "loss": 0.1827392578125, + "step": 4557 + }, + { + "epoch": 0.3080978775179127, + "grad_norm": 0.7005778551101685, + "learning_rate": 2.4443822577708374e-05, + "loss": 0.1360321044921875, + "step": 4558 + }, + { + "epoch": 0.30816547248884685, + "grad_norm": 0.3042205274105072, + "learning_rate": 2.444125884844933e-05, + "loss": 0.0502166748046875, + "step": 4559 + }, + { + "epoch": 0.308233067459781, + "grad_norm": 0.6850687265396118, + "learning_rate": 2.4438694662363458e-05, + "loss": 0.1427459716796875, + "step": 4560 + }, + { + "epoch": 0.30830066243071513, + "grad_norm": 0.886519193649292, + "learning_rate": 2.4436130019574847e-05, + "loss": 0.1666259765625, + "step": 4561 + }, + { + "epoch": 0.3083682574016493, + "grad_norm": 0.5957180857658386, + "learning_rate": 2.4433564920207582e-05, + "loss": 0.107147216796875, + "step": 4562 + }, + { + "epoch": 0.3084358523725835, + "grad_norm": 0.8792882561683655, + "learning_rate": 2.4430999364385782e-05, + "loss": 0.17995452880859375, + "step": 4563 + }, + { + "epoch": 0.30850344734351764, + "grad_norm": 0.25457271933555603, + "learning_rate": 2.4428433352233577e-05, + "loss": 0.030025482177734375, + "step": 4564 + }, + { + "epoch": 0.3085710423144518, + "grad_norm": 0.8439216017723083, + "learning_rate": 2.442586688387514e-05, + "loss": 0.175048828125, + "step": 4565 + }, + { + "epoch": 0.308638637285386, + "grad_norm": 0.5600271224975586, + "learning_rate": 2.4423299959434637e-05, + "loss": 0.11712646484375, + "step": 4566 + }, + { + "epoch": 0.30870623225632016, + "grad_norm": 1.9945106506347656, + "learning_rate": 2.442073257903628e-05, + "loss": 0.26385498046875, + "step": 4567 + }, + { + "epoch": 0.30877382722725427, + "grad_norm": 0.8785309195518494, + "learning_rate": 2.4418164742804294e-05, + "loss": 0.215545654296875, + "step": 4568 + }, + { + "epoch": 0.30884142219818844, + "grad_norm": 0.9948610067367554, + "learning_rate": 2.441559645086293e-05, + "loss": 0.221466064453125, + "step": 4569 + }, + { + "epoch": 0.3089090171691226, + "grad_norm": 0.24929581582546234, + "learning_rate": 2.441302770333645e-05, + "loss": 0.04761505126953125, + "step": 4570 + }, + { + "epoch": 0.3089766121400568, + "grad_norm": 0.3542511761188507, + "learning_rate": 2.441045850034915e-05, + "loss": 0.08927154541015625, + "step": 4571 + }, + { + "epoch": 0.30904420711099095, + "grad_norm": 1.0908821821212769, + "learning_rate": 2.440788884202535e-05, + "loss": 0.1241607666015625, + "step": 4572 + }, + { + "epoch": 0.3091118020819251, + "grad_norm": 0.6007949113845825, + "learning_rate": 2.440531872848938e-05, + "loss": 0.12120819091796875, + "step": 4573 + }, + { + "epoch": 0.3091793970528593, + "grad_norm": 0.8365132212638855, + "learning_rate": 2.4402748159865594e-05, + "loss": 0.1263427734375, + "step": 4574 + }, + { + "epoch": 0.3092469920237934, + "grad_norm": 0.8254719972610474, + "learning_rate": 2.4400177136278383e-05, + "loss": 0.15362548828125, + "step": 4575 + }, + { + "epoch": 0.3093145869947276, + "grad_norm": 0.3651621639728546, + "learning_rate": 2.4397605657852142e-05, + "loss": 0.045074462890625, + "step": 4576 + }, + { + "epoch": 0.30938218196566175, + "grad_norm": 1.0712671279907227, + "learning_rate": 2.4395033724711293e-05, + "loss": 0.181060791015625, + "step": 4577 + }, + { + "epoch": 0.3094497769365959, + "grad_norm": 0.3909228444099426, + "learning_rate": 2.4392461336980285e-05, + "loss": 0.05933380126953125, + "step": 4578 + }, + { + "epoch": 0.3095173719075301, + "grad_norm": 0.7334007620811462, + "learning_rate": 2.438988849478359e-05, + "loss": 0.1322021484375, + "step": 4579 + }, + { + "epoch": 0.30958496687846426, + "grad_norm": 0.5114954113960266, + "learning_rate": 2.438731519824569e-05, + "loss": 0.08164596557617188, + "step": 4580 + }, + { + "epoch": 0.3096525618493984, + "grad_norm": 1.1756020784378052, + "learning_rate": 2.4384741447491104e-05, + "loss": 0.12688446044921875, + "step": 4581 + }, + { + "epoch": 0.30972015682033255, + "grad_norm": 1.4252568483352661, + "learning_rate": 2.4382167242644366e-05, + "loss": 0.199615478515625, + "step": 4582 + }, + { + "epoch": 0.3097877517912667, + "grad_norm": 0.548751175403595, + "learning_rate": 2.437959258383003e-05, + "loss": 0.091033935546875, + "step": 4583 + }, + { + "epoch": 0.3098553467622009, + "grad_norm": 0.510044276714325, + "learning_rate": 2.437701747117267e-05, + "loss": 0.08025360107421875, + "step": 4584 + }, + { + "epoch": 0.30992294173313506, + "grad_norm": 1.0664178133010864, + "learning_rate": 2.4374441904796888e-05, + "loss": 0.1266021728515625, + "step": 4585 + }, + { + "epoch": 0.30999053670406923, + "grad_norm": 0.30099382996559143, + "learning_rate": 2.4371865884827314e-05, + "loss": 0.04534149169921875, + "step": 4586 + }, + { + "epoch": 0.3100581316750034, + "grad_norm": 0.6645945906639099, + "learning_rate": 2.4369289411388582e-05, + "loss": 0.08623313903808594, + "step": 4587 + }, + { + "epoch": 0.3101257266459375, + "grad_norm": 0.9177051782608032, + "learning_rate": 2.4366712484605362e-05, + "loss": 0.1175537109375, + "step": 4588 + }, + { + "epoch": 0.3101933216168717, + "grad_norm": 0.2945314347743988, + "learning_rate": 2.436413510460234e-05, + "loss": 0.06110382080078125, + "step": 4589 + }, + { + "epoch": 0.31026091658780586, + "grad_norm": 0.8156729340553284, + "learning_rate": 2.4361557271504227e-05, + "loss": 0.1154022216796875, + "step": 4590 + }, + { + "epoch": 0.31032851155874003, + "grad_norm": 0.8873340487480164, + "learning_rate": 2.4358978985435757e-05, + "loss": 0.1365947723388672, + "step": 4591 + }, + { + "epoch": 0.3103961065296742, + "grad_norm": 0.24030618369579315, + "learning_rate": 2.4356400246521675e-05, + "loss": 0.05554962158203125, + "step": 4592 + }, + { + "epoch": 0.31046370150060837, + "grad_norm": 0.3970939517021179, + "learning_rate": 2.435382105488677e-05, + "loss": 0.0993804931640625, + "step": 4593 + }, + { + "epoch": 0.31053129647154254, + "grad_norm": 0.31118085980415344, + "learning_rate": 2.4351241410655827e-05, + "loss": 0.022809982299804688, + "step": 4594 + }, + { + "epoch": 0.31059889144247665, + "grad_norm": 0.39093345403671265, + "learning_rate": 2.434866131395367e-05, + "loss": 0.07266998291015625, + "step": 4595 + }, + { + "epoch": 0.3106664864134108, + "grad_norm": 0.22971662878990173, + "learning_rate": 2.4346080764905137e-05, + "loss": 0.0399322509765625, + "step": 4596 + }, + { + "epoch": 0.310734081384345, + "grad_norm": 0.4084847867488861, + "learning_rate": 2.43434997636351e-05, + "loss": 0.08570671081542969, + "step": 4597 + }, + { + "epoch": 0.31080167635527917, + "grad_norm": 0.45570841431617737, + "learning_rate": 2.4340918310268435e-05, + "loss": 0.07729339599609375, + "step": 4598 + }, + { + "epoch": 0.31086927132621334, + "grad_norm": 0.60152667760849, + "learning_rate": 2.433833640493005e-05, + "loss": 0.121063232421875, + "step": 4599 + }, + { + "epoch": 0.3109368662971475, + "grad_norm": 1.240374207496643, + "learning_rate": 2.4335754047744875e-05, + "loss": 0.1703338623046875, + "step": 4600 + }, + { + "epoch": 0.3110044612680817, + "grad_norm": 0.36600902676582336, + "learning_rate": 2.4333171238837863e-05, + "loss": 0.075042724609375, + "step": 4601 + }, + { + "epoch": 0.3110720562390158, + "grad_norm": 1.0226103067398071, + "learning_rate": 2.4330587978333986e-05, + "loss": 0.14235687255859375, + "step": 4602 + }, + { + "epoch": 0.31113965120994996, + "grad_norm": 1.139746904373169, + "learning_rate": 2.4328004266358234e-05, + "loss": 0.23895263671875, + "step": 4603 + }, + { + "epoch": 0.31120724618088413, + "grad_norm": 0.4671783745288849, + "learning_rate": 2.4325420103035626e-05, + "loss": 0.073944091796875, + "step": 4604 + }, + { + "epoch": 0.3112748411518183, + "grad_norm": 0.3119015395641327, + "learning_rate": 2.4322835488491198e-05, + "loss": 0.0422515869140625, + "step": 4605 + }, + { + "epoch": 0.3113424361227525, + "grad_norm": 0.3453280031681061, + "learning_rate": 2.432025042285001e-05, + "loss": 0.05310821533203125, + "step": 4606 + }, + { + "epoch": 0.31141003109368665, + "grad_norm": 0.619429349899292, + "learning_rate": 2.4317664906237146e-05, + "loss": 0.13048553466796875, + "step": 4607 + }, + { + "epoch": 0.3114776260646208, + "grad_norm": 0.756438672542572, + "learning_rate": 2.431507893877771e-05, + "loss": 0.12229537963867188, + "step": 4608 + }, + { + "epoch": 0.31154522103555493, + "grad_norm": 0.37638312578201294, + "learning_rate": 2.431249252059682e-05, + "loss": 0.091949462890625, + "step": 4609 + }, + { + "epoch": 0.3116128160064891, + "grad_norm": 0.5177965760231018, + "learning_rate": 2.430990565181963e-05, + "loss": 0.114715576171875, + "step": 4610 + }, + { + "epoch": 0.3116804109774233, + "grad_norm": 0.5055484175682068, + "learning_rate": 2.430731833257131e-05, + "loss": 0.06444549560546875, + "step": 4611 + }, + { + "epoch": 0.31174800594835744, + "grad_norm": 0.7846696376800537, + "learning_rate": 2.4304730562977047e-05, + "loss": 0.11698150634765625, + "step": 4612 + }, + { + "epoch": 0.3118156009192916, + "grad_norm": 0.8742433190345764, + "learning_rate": 2.4302142343162053e-05, + "loss": 0.12086105346679688, + "step": 4613 + }, + { + "epoch": 0.3118831958902258, + "grad_norm": 1.1715809106826782, + "learning_rate": 2.4299553673251563e-05, + "loss": 0.195098876953125, + "step": 4614 + }, + { + "epoch": 0.31195079086115995, + "grad_norm": 0.7326221466064453, + "learning_rate": 2.4296964553370832e-05, + "loss": 0.1078643798828125, + "step": 4615 + }, + { + "epoch": 0.31201838583209407, + "grad_norm": 1.0294333696365356, + "learning_rate": 2.429437498364514e-05, + "loss": 0.16705322265625, + "step": 4616 + }, + { + "epoch": 0.31208598080302824, + "grad_norm": 0.6189019680023193, + "learning_rate": 2.4291784964199775e-05, + "loss": 0.1200408935546875, + "step": 4617 + }, + { + "epoch": 0.3121535757739624, + "grad_norm": 0.39964574575424194, + "learning_rate": 2.428919449516008e-05, + "loss": 0.039886474609375, + "step": 4618 + }, + { + "epoch": 0.3122211707448966, + "grad_norm": 0.6332882642745972, + "learning_rate": 2.4286603576651388e-05, + "loss": 0.1294097900390625, + "step": 4619 + }, + { + "epoch": 0.31228876571583075, + "grad_norm": 0.6891769170761108, + "learning_rate": 2.4284012208799055e-05, + "loss": 0.16241455078125, + "step": 4620 + }, + { + "epoch": 0.3123563606867649, + "grad_norm": 1.0633373260498047, + "learning_rate": 2.4281420391728478e-05, + "loss": 0.17420196533203125, + "step": 4621 + }, + { + "epoch": 0.3124239556576991, + "grad_norm": 0.5502949357032776, + "learning_rate": 2.4278828125565067e-05, + "loss": 0.13177490234375, + "step": 4622 + }, + { + "epoch": 0.3124915506286332, + "grad_norm": 1.342261552810669, + "learning_rate": 2.427623541043424e-05, + "loss": 0.287109375, + "step": 4623 + }, + { + "epoch": 0.3125591455995674, + "grad_norm": 1.1539509296417236, + "learning_rate": 2.4273642246461457e-05, + "loss": 0.11824798583984375, + "step": 4624 + }, + { + "epoch": 0.31262674057050155, + "grad_norm": 1.028615117073059, + "learning_rate": 2.427104863377219e-05, + "loss": 0.219390869140625, + "step": 4625 + }, + { + "epoch": 0.3126943355414357, + "grad_norm": 0.9417972564697266, + "learning_rate": 2.4268454572491934e-05, + "loss": 0.15641021728515625, + "step": 4626 + }, + { + "epoch": 0.3127619305123699, + "grad_norm": 0.43406346440315247, + "learning_rate": 2.4265860062746203e-05, + "loss": 0.1306610107421875, + "step": 4627 + }, + { + "epoch": 0.31282952548330406, + "grad_norm": 0.38827261328697205, + "learning_rate": 2.4263265104660542e-05, + "loss": 0.06865310668945312, + "step": 4628 + }, + { + "epoch": 0.31289712045423823, + "grad_norm": 0.6804010272026062, + "learning_rate": 2.4260669698360506e-05, + "loss": 0.1055755615234375, + "step": 4629 + }, + { + "epoch": 0.31296471542517235, + "grad_norm": 0.46175581216812134, + "learning_rate": 2.4258073843971686e-05, + "loss": 0.124664306640625, + "step": 4630 + }, + { + "epoch": 0.3130323103961065, + "grad_norm": 0.5538236498832703, + "learning_rate": 2.425547754161967e-05, + "loss": 0.086181640625, + "step": 4631 + }, + { + "epoch": 0.3130999053670407, + "grad_norm": 0.6153519749641418, + "learning_rate": 2.425288079143009e-05, + "loss": 0.150909423828125, + "step": 4632 + }, + { + "epoch": 0.31316750033797486, + "grad_norm": 0.6308303475379944, + "learning_rate": 2.42502835935286e-05, + "loss": 0.12387847900390625, + "step": 4633 + }, + { + "epoch": 0.31323509530890903, + "grad_norm": 0.681050717830658, + "learning_rate": 2.424768594804086e-05, + "loss": 0.09000015258789062, + "step": 4634 + }, + { + "epoch": 0.3133026902798432, + "grad_norm": 1.0604764223098755, + "learning_rate": 2.424508785509256e-05, + "loss": 0.13135528564453125, + "step": 4635 + }, + { + "epoch": 0.31337028525077737, + "grad_norm": 0.5083533525466919, + "learning_rate": 2.424248931480942e-05, + "loss": 0.1285572052001953, + "step": 4636 + }, + { + "epoch": 0.3134378802217115, + "grad_norm": 0.4196830689907074, + "learning_rate": 2.4239890327317167e-05, + "loss": 0.0942535400390625, + "step": 4637 + }, + { + "epoch": 0.31350547519264566, + "grad_norm": 0.6333131194114685, + "learning_rate": 2.4237290892741557e-05, + "loss": 0.13934326171875, + "step": 4638 + }, + { + "epoch": 0.3135730701635798, + "grad_norm": 0.6168698072433472, + "learning_rate": 2.423469101120837e-05, + "loss": 0.118011474609375, + "step": 4639 + }, + { + "epoch": 0.313640665134514, + "grad_norm": 0.7230620980262756, + "learning_rate": 2.4232090682843397e-05, + "loss": 0.12034225463867188, + "step": 4640 + }, + { + "epoch": 0.31370826010544817, + "grad_norm": 0.40785104036331177, + "learning_rate": 2.4229489907772466e-05, + "loss": 0.08175086975097656, + "step": 4641 + }, + { + "epoch": 0.31377585507638234, + "grad_norm": 0.3910507261753082, + "learning_rate": 2.4226888686121414e-05, + "loss": 0.06560516357421875, + "step": 4642 + }, + { + "epoch": 0.31384345004731645, + "grad_norm": 0.44702383875846863, + "learning_rate": 2.4224287018016112e-05, + "loss": 0.0814361572265625, + "step": 4643 + }, + { + "epoch": 0.3139110450182506, + "grad_norm": 1.4027104377746582, + "learning_rate": 2.4221684903582434e-05, + "loss": 0.13671875, + "step": 4644 + }, + { + "epoch": 0.3139786399891848, + "grad_norm": 0.6988498568534851, + "learning_rate": 2.4219082342946294e-05, + "loss": 0.164215087890625, + "step": 4645 + }, + { + "epoch": 0.31404623496011896, + "grad_norm": 0.6646261811256409, + "learning_rate": 2.4216479336233615e-05, + "loss": 0.1129302978515625, + "step": 4646 + }, + { + "epoch": 0.31411382993105313, + "grad_norm": 0.8995798826217651, + "learning_rate": 2.4213875883570347e-05, + "loss": 0.20050048828125, + "step": 4647 + }, + { + "epoch": 0.3141814249019873, + "grad_norm": 0.5573684573173523, + "learning_rate": 2.421127198508247e-05, + "loss": 0.11994171142578125, + "step": 4648 + }, + { + "epoch": 0.3142490198729215, + "grad_norm": 1.984899640083313, + "learning_rate": 2.420866764089596e-05, + "loss": 0.293792724609375, + "step": 4649 + }, + { + "epoch": 0.3143166148438556, + "grad_norm": 0.49040624499320984, + "learning_rate": 2.4206062851136847e-05, + "loss": 0.11989593505859375, + "step": 4650 + }, + { + "epoch": 0.31438420981478976, + "grad_norm": 0.44107306003570557, + "learning_rate": 2.420345761593116e-05, + "loss": 0.0673370361328125, + "step": 4651 + }, + { + "epoch": 0.31445180478572393, + "grad_norm": 0.35707053542137146, + "learning_rate": 2.4200851935404962e-05, + "loss": 0.080780029296875, + "step": 4652 + }, + { + "epoch": 0.3145193997566581, + "grad_norm": 0.5534530282020569, + "learning_rate": 2.4198245809684325e-05, + "loss": 0.1159820556640625, + "step": 4653 + }, + { + "epoch": 0.3145869947275923, + "grad_norm": 0.21929728984832764, + "learning_rate": 2.4195639238895352e-05, + "loss": 0.0431976318359375, + "step": 4654 + }, + { + "epoch": 0.31465458969852644, + "grad_norm": 0.7576762437820435, + "learning_rate": 2.4193032223164163e-05, + "loss": 0.1687469482421875, + "step": 4655 + }, + { + "epoch": 0.3147221846694606, + "grad_norm": 0.6930139660835266, + "learning_rate": 2.4190424762616912e-05, + "loss": 0.10445404052734375, + "step": 4656 + }, + { + "epoch": 0.31478977964039473, + "grad_norm": 0.40393590927124023, + "learning_rate": 2.4187816857379752e-05, + "loss": 0.07970428466796875, + "step": 4657 + }, + { + "epoch": 0.3148573746113289, + "grad_norm": 0.7067824006080627, + "learning_rate": 2.4185208507578875e-05, + "loss": 0.15416717529296875, + "step": 4658 + }, + { + "epoch": 0.31492496958226307, + "grad_norm": 0.5970788598060608, + "learning_rate": 2.4182599713340484e-05, + "loss": 0.07037353515625, + "step": 4659 + }, + { + "epoch": 0.31499256455319724, + "grad_norm": 0.34981241822242737, + "learning_rate": 2.417999047479082e-05, + "loss": 0.05123138427734375, + "step": 4660 + }, + { + "epoch": 0.3150601595241314, + "grad_norm": 0.36011892557144165, + "learning_rate": 2.417738079205612e-05, + "loss": 0.0809478759765625, + "step": 4661 + }, + { + "epoch": 0.3151277544950656, + "grad_norm": 0.7485223412513733, + "learning_rate": 2.4174770665262667e-05, + "loss": 0.1513671875, + "step": 4662 + }, + { + "epoch": 0.31519534946599975, + "grad_norm": 0.9962133169174194, + "learning_rate": 2.417216009453675e-05, + "loss": 0.173431396484375, + "step": 4663 + }, + { + "epoch": 0.31526294443693387, + "grad_norm": 0.7343834638595581, + "learning_rate": 2.4169549080004688e-05, + "loss": 0.167083740234375, + "step": 4664 + }, + { + "epoch": 0.31533053940786804, + "grad_norm": 0.7545181512832642, + "learning_rate": 2.4166937621792818e-05, + "loss": 0.13394927978515625, + "step": 4665 + }, + { + "epoch": 0.3153981343788022, + "grad_norm": 1.1269789934158325, + "learning_rate": 2.4164325720027492e-05, + "loss": 0.210906982421875, + "step": 4666 + }, + { + "epoch": 0.3154657293497364, + "grad_norm": 0.1636061668395996, + "learning_rate": 2.4161713374835103e-05, + "loss": 0.029895782470703125, + "step": 4667 + }, + { + "epoch": 0.31553332432067055, + "grad_norm": 0.45729777216911316, + "learning_rate": 2.4159100586342044e-05, + "loss": 0.06752777099609375, + "step": 4668 + }, + { + "epoch": 0.3156009192916047, + "grad_norm": 1.4275202751159668, + "learning_rate": 2.4156487354674734e-05, + "loss": 0.244720458984375, + "step": 4669 + }, + { + "epoch": 0.3156685142625389, + "grad_norm": 0.23011702299118042, + "learning_rate": 2.4153873679959624e-05, + "loss": 0.038890838623046875, + "step": 4670 + }, + { + "epoch": 0.315736109233473, + "grad_norm": 0.43191489577293396, + "learning_rate": 2.415125956232318e-05, + "loss": 0.072784423828125, + "step": 4671 + }, + { + "epoch": 0.3158037042044072, + "grad_norm": 0.19401925802230835, + "learning_rate": 2.4148645001891883e-05, + "loss": 0.043209075927734375, + "step": 4672 + }, + { + "epoch": 0.31587129917534135, + "grad_norm": 1.611680507659912, + "learning_rate": 2.414602999879225e-05, + "loss": 0.1878662109375, + "step": 4673 + }, + { + "epoch": 0.3159388941462755, + "grad_norm": 0.5908228754997253, + "learning_rate": 2.4143414553150805e-05, + "loss": 0.108062744140625, + "step": 4674 + }, + { + "epoch": 0.3160064891172097, + "grad_norm": 1.0670899152755737, + "learning_rate": 2.41407986650941e-05, + "loss": 0.141265869140625, + "step": 4675 + }, + { + "epoch": 0.31607408408814386, + "grad_norm": 0.500966489315033, + "learning_rate": 2.4138182334748715e-05, + "loss": 0.08770751953125, + "step": 4676 + }, + { + "epoch": 0.31614167905907803, + "grad_norm": 1.0990190505981445, + "learning_rate": 2.4135565562241233e-05, + "loss": 0.21038818359375, + "step": 4677 + }, + { + "epoch": 0.31620927403001214, + "grad_norm": 1.5013819932937622, + "learning_rate": 2.413294834769828e-05, + "loss": 0.14075851440429688, + "step": 4678 + }, + { + "epoch": 0.3162768690009463, + "grad_norm": 0.4017148017883301, + "learning_rate": 2.413033069124649e-05, + "loss": 0.06828689575195312, + "step": 4679 + }, + { + "epoch": 0.3163444639718805, + "grad_norm": 0.9955119490623474, + "learning_rate": 2.412771259301252e-05, + "loss": 0.12624359130859375, + "step": 4680 + }, + { + "epoch": 0.31641205894281466, + "grad_norm": 0.8472067713737488, + "learning_rate": 2.4125094053123044e-05, + "loss": 0.182342529296875, + "step": 4681 + }, + { + "epoch": 0.3164796539137488, + "grad_norm": 1.0041999816894531, + "learning_rate": 2.412247507170477e-05, + "loss": 0.1534576416015625, + "step": 4682 + }, + { + "epoch": 0.316547248884683, + "grad_norm": 1.0077601671218872, + "learning_rate": 2.411985564888443e-05, + "loss": 0.1826171875, + "step": 4683 + }, + { + "epoch": 0.31661484385561717, + "grad_norm": 0.6585657596588135, + "learning_rate": 2.4117235784788747e-05, + "loss": 0.14300537109375, + "step": 4684 + }, + { + "epoch": 0.3166824388265513, + "grad_norm": 0.6776173710823059, + "learning_rate": 2.41146154795445e-05, + "loss": 0.1085205078125, + "step": 4685 + }, + { + "epoch": 0.31675003379748545, + "grad_norm": 1.7003378868103027, + "learning_rate": 2.411199473327847e-05, + "loss": 0.2183380126953125, + "step": 4686 + }, + { + "epoch": 0.3168176287684196, + "grad_norm": 0.4910496473312378, + "learning_rate": 2.4109373546117475e-05, + "loss": 0.1225128173828125, + "step": 4687 + }, + { + "epoch": 0.3168852237393538, + "grad_norm": 0.598829448223114, + "learning_rate": 2.4106751918188333e-05, + "loss": 0.144866943359375, + "step": 4688 + }, + { + "epoch": 0.31695281871028796, + "grad_norm": 0.6127461194992065, + "learning_rate": 2.4104129849617903e-05, + "loss": 0.06015777587890625, + "step": 4689 + }, + { + "epoch": 0.31702041368122214, + "grad_norm": 0.3903936743736267, + "learning_rate": 2.4101507340533047e-05, + "loss": 0.090972900390625, + "step": 4690 + }, + { + "epoch": 0.3170880086521563, + "grad_norm": 0.9992654323577881, + "learning_rate": 2.409888439106067e-05, + "loss": 0.1563262939453125, + "step": 4691 + }, + { + "epoch": 0.3171556036230904, + "grad_norm": 0.42581337690353394, + "learning_rate": 2.4096261001327675e-05, + "loss": 0.102813720703125, + "step": 4692 + }, + { + "epoch": 0.3172231985940246, + "grad_norm": 0.25735053420066833, + "learning_rate": 2.409363717146101e-05, + "loss": 0.03485870361328125, + "step": 4693 + }, + { + "epoch": 0.31729079356495876, + "grad_norm": 0.5883010029792786, + "learning_rate": 2.4091012901587622e-05, + "loss": 0.1160430908203125, + "step": 4694 + }, + { + "epoch": 0.31735838853589293, + "grad_norm": 1.0870085954666138, + "learning_rate": 2.4088388191834492e-05, + "loss": 0.22039794921875, + "step": 4695 + }, + { + "epoch": 0.3174259835068271, + "grad_norm": 0.420332670211792, + "learning_rate": 2.4085763042328626e-05, + "loss": 0.06596660614013672, + "step": 4696 + }, + { + "epoch": 0.3174935784777613, + "grad_norm": 0.6428868770599365, + "learning_rate": 2.4083137453197038e-05, + "loss": 0.0973358154296875, + "step": 4697 + }, + { + "epoch": 0.31756117344869544, + "grad_norm": 1.4150190353393555, + "learning_rate": 2.408051142456677e-05, + "loss": 0.2420501708984375, + "step": 4698 + }, + { + "epoch": 0.31762876841962956, + "grad_norm": 1.063437581062317, + "learning_rate": 2.407788495656489e-05, + "loss": 0.2275543212890625, + "step": 4699 + }, + { + "epoch": 0.31769636339056373, + "grad_norm": 0.3634606599807739, + "learning_rate": 2.407525804931848e-05, + "loss": 0.06396484375, + "step": 4700 + }, + { + "epoch": 0.3177639583614979, + "grad_norm": 0.5835754871368408, + "learning_rate": 2.407263070295465e-05, + "loss": 0.12244796752929688, + "step": 4701 + }, + { + "epoch": 0.31783155333243207, + "grad_norm": 1.4793556928634644, + "learning_rate": 2.4070002917600523e-05, + "loss": 0.242218017578125, + "step": 4702 + }, + { + "epoch": 0.31789914830336624, + "grad_norm": 1.3198870420455933, + "learning_rate": 2.406737469338325e-05, + "loss": 0.2705078125, + "step": 4703 + }, + { + "epoch": 0.3179667432743004, + "grad_norm": 0.3393680453300476, + "learning_rate": 2.406474603043e-05, + "loss": 0.05538177490234375, + "step": 4704 + }, + { + "epoch": 0.3180343382452345, + "grad_norm": 1.6882096529006958, + "learning_rate": 2.4062116928867963e-05, + "loss": 0.25238037109375, + "step": 4705 + }, + { + "epoch": 0.3181019332161687, + "grad_norm": 0.23820768296718597, + "learning_rate": 2.4059487388824352e-05, + "loss": 0.03774261474609375, + "step": 4706 + }, + { + "epoch": 0.31816952818710287, + "grad_norm": 0.9069522619247437, + "learning_rate": 2.4056857410426402e-05, + "loss": 0.137847900390625, + "step": 4707 + }, + { + "epoch": 0.31823712315803704, + "grad_norm": 0.48094016313552856, + "learning_rate": 2.4054226993801363e-05, + "loss": 0.0938873291015625, + "step": 4708 + }, + { + "epoch": 0.3183047181289712, + "grad_norm": 0.4703005850315094, + "learning_rate": 2.4051596139076516e-05, + "loss": 0.10639190673828125, + "step": 4709 + }, + { + "epoch": 0.3183723130999054, + "grad_norm": 0.6142823100090027, + "learning_rate": 2.4048964846379154e-05, + "loss": 0.1168365478515625, + "step": 4710 + }, + { + "epoch": 0.31843990807083955, + "grad_norm": 0.3473421633243561, + "learning_rate": 2.4046333115836605e-05, + "loss": 0.0741729736328125, + "step": 4711 + }, + { + "epoch": 0.31850750304177367, + "grad_norm": 0.85907381772995, + "learning_rate": 2.40437009475762e-05, + "loss": 0.1646881103515625, + "step": 4712 + }, + { + "epoch": 0.31857509801270784, + "grad_norm": 1.4185293912887573, + "learning_rate": 2.4041068341725297e-05, + "loss": 0.1548004150390625, + "step": 4713 + }, + { + "epoch": 0.318642692983642, + "grad_norm": 0.6489579081535339, + "learning_rate": 2.4038435298411282e-05, + "loss": 0.09783935546875, + "step": 4714 + }, + { + "epoch": 0.3187102879545762, + "grad_norm": 0.9723486304283142, + "learning_rate": 2.4035801817761563e-05, + "loss": 0.139373779296875, + "step": 4715 + }, + { + "epoch": 0.31877788292551035, + "grad_norm": 1.142874002456665, + "learning_rate": 2.4033167899903553e-05, + "loss": 0.198822021484375, + "step": 4716 + }, + { + "epoch": 0.3188454778964445, + "grad_norm": 0.873059868812561, + "learning_rate": 2.4030533544964705e-05, + "loss": 0.143829345703125, + "step": 4717 + }, + { + "epoch": 0.3189130728673787, + "grad_norm": 0.461980938911438, + "learning_rate": 2.4027898753072482e-05, + "loss": 0.1027984619140625, + "step": 4718 + }, + { + "epoch": 0.3189806678383128, + "grad_norm": 0.2260478138923645, + "learning_rate": 2.4025263524354378e-05, + "loss": 0.04478645324707031, + "step": 4719 + }, + { + "epoch": 0.319048262809247, + "grad_norm": 0.3401228189468384, + "learning_rate": 2.4022627858937894e-05, + "loss": 0.053131103515625, + "step": 4720 + }, + { + "epoch": 0.31911585778018114, + "grad_norm": 0.587075412273407, + "learning_rate": 2.4019991756950562e-05, + "loss": 0.14153289794921875, + "step": 4721 + }, + { + "epoch": 0.3191834527511153, + "grad_norm": 0.2944251298904419, + "learning_rate": 2.401735521851994e-05, + "loss": 0.0635528564453125, + "step": 4722 + }, + { + "epoch": 0.3192510477220495, + "grad_norm": 0.7479535341262817, + "learning_rate": 2.401471824377359e-05, + "loss": 0.19830322265625, + "step": 4723 + }, + { + "epoch": 0.31931864269298366, + "grad_norm": 0.5534106492996216, + "learning_rate": 2.401208083283911e-05, + "loss": 0.09630966186523438, + "step": 4724 + }, + { + "epoch": 0.3193862376639178, + "grad_norm": 0.6888276934623718, + "learning_rate": 2.4009442985844113e-05, + "loss": 0.12709808349609375, + "step": 4725 + }, + { + "epoch": 0.31945383263485194, + "grad_norm": 1.2136468887329102, + "learning_rate": 2.4006804702916237e-05, + "loss": 0.18259429931640625, + "step": 4726 + }, + { + "epoch": 0.3195214276057861, + "grad_norm": 1.726121425628662, + "learning_rate": 2.4004165984183137e-05, + "loss": 0.1781158447265625, + "step": 4727 + }, + { + "epoch": 0.3195890225767203, + "grad_norm": 0.3999043107032776, + "learning_rate": 2.4001526829772488e-05, + "loss": 0.061798095703125, + "step": 4728 + }, + { + "epoch": 0.31965661754765445, + "grad_norm": 0.6649033427238464, + "learning_rate": 2.399888723981199e-05, + "loss": 0.174163818359375, + "step": 4729 + }, + { + "epoch": 0.3197242125185886, + "grad_norm": 0.7995035648345947, + "learning_rate": 2.3996247214429366e-05, + "loss": 0.17584228515625, + "step": 4730 + }, + { + "epoch": 0.3197918074895228, + "grad_norm": 0.5842522978782654, + "learning_rate": 2.3993606753752356e-05, + "loss": 0.10699844360351562, + "step": 4731 + }, + { + "epoch": 0.31985940246045697, + "grad_norm": 0.587932825088501, + "learning_rate": 2.3990965857908717e-05, + "loss": 0.149139404296875, + "step": 4732 + }, + { + "epoch": 0.3199269974313911, + "grad_norm": 0.3078608810901642, + "learning_rate": 2.398832452702624e-05, + "loss": 0.05637359619140625, + "step": 4733 + }, + { + "epoch": 0.31999459240232525, + "grad_norm": 0.49529871344566345, + "learning_rate": 2.398568276123272e-05, + "loss": 0.0955047607421875, + "step": 4734 + }, + { + "epoch": 0.3200621873732594, + "grad_norm": 0.7332590222358704, + "learning_rate": 2.398304056065599e-05, + "loss": 0.11359024047851562, + "step": 4735 + }, + { + "epoch": 0.3201297823441936, + "grad_norm": 0.3709087371826172, + "learning_rate": 2.3980397925423888e-05, + "loss": 0.07599639892578125, + "step": 4736 + }, + { + "epoch": 0.32019737731512776, + "grad_norm": 0.33911997079849243, + "learning_rate": 2.3977754855664295e-05, + "loss": 0.06069183349609375, + "step": 4737 + }, + { + "epoch": 0.32026497228606193, + "grad_norm": 0.6031802296638489, + "learning_rate": 2.397511135150508e-05, + "loss": 0.13504791259765625, + "step": 4738 + }, + { + "epoch": 0.3203325672569961, + "grad_norm": 1.3953282833099365, + "learning_rate": 2.397246741307417e-05, + "loss": 0.14064788818359375, + "step": 4739 + }, + { + "epoch": 0.3204001622279302, + "grad_norm": 0.24348212778568268, + "learning_rate": 2.3969823040499486e-05, + "loss": 0.042583465576171875, + "step": 4740 + }, + { + "epoch": 0.3204677571988644, + "grad_norm": 0.5169712901115417, + "learning_rate": 2.3967178233908978e-05, + "loss": 0.1230010986328125, + "step": 4741 + }, + { + "epoch": 0.32053535216979856, + "grad_norm": 0.4177095890045166, + "learning_rate": 2.396453299343062e-05, + "loss": 0.08415603637695312, + "step": 4742 + }, + { + "epoch": 0.32060294714073273, + "grad_norm": 0.9050095081329346, + "learning_rate": 2.396188731919241e-05, + "loss": 0.12965965270996094, + "step": 4743 + }, + { + "epoch": 0.3206705421116669, + "grad_norm": 0.30175918340682983, + "learning_rate": 2.3959241211322355e-05, + "loss": 0.05268096923828125, + "step": 4744 + }, + { + "epoch": 0.32073813708260107, + "grad_norm": 0.8613353967666626, + "learning_rate": 2.395659466994849e-05, + "loss": 0.1584014892578125, + "step": 4745 + }, + { + "epoch": 0.32080573205353524, + "grad_norm": 0.3886498808860779, + "learning_rate": 2.395394769519888e-05, + "loss": 0.089752197265625, + "step": 4746 + }, + { + "epoch": 0.32087332702446936, + "grad_norm": 1.3049119710922241, + "learning_rate": 2.3951300287201597e-05, + "loss": 0.2063751220703125, + "step": 4747 + }, + { + "epoch": 0.3209409219954035, + "grad_norm": 0.7128794193267822, + "learning_rate": 2.3948652446084733e-05, + "loss": 0.11474227905273438, + "step": 4748 + }, + { + "epoch": 0.3210085169663377, + "grad_norm": 0.40000587701797485, + "learning_rate": 2.3946004171976415e-05, + "loss": 0.0846710205078125, + "step": 4749 + }, + { + "epoch": 0.32107611193727187, + "grad_norm": 0.3005344569683075, + "learning_rate": 2.3943355465004786e-05, + "loss": 0.05205535888671875, + "step": 4750 + }, + { + "epoch": 0.32114370690820604, + "grad_norm": 0.5034216642379761, + "learning_rate": 2.3940706325297995e-05, + "loss": 0.09320831298828125, + "step": 4751 + }, + { + "epoch": 0.3212113018791402, + "grad_norm": 0.3190695345401764, + "learning_rate": 2.3938056752984233e-05, + "loss": 0.0469818115234375, + "step": 4752 + }, + { + "epoch": 0.3212788968500744, + "grad_norm": 0.6189470887184143, + "learning_rate": 2.3935406748191698e-05, + "loss": 0.1307373046875, + "step": 4753 + }, + { + "epoch": 0.3213464918210085, + "grad_norm": 0.30929869413375854, + "learning_rate": 2.3932756311048617e-05, + "loss": 0.041614532470703125, + "step": 4754 + }, + { + "epoch": 0.32141408679194267, + "grad_norm": 0.36720725893974304, + "learning_rate": 2.3930105441683233e-05, + "loss": 0.07747650146484375, + "step": 4755 + }, + { + "epoch": 0.32148168176287684, + "grad_norm": 0.4717094302177429, + "learning_rate": 2.3927454140223816e-05, + "loss": 0.09189605712890625, + "step": 4756 + }, + { + "epoch": 0.321549276733811, + "grad_norm": 0.9826074838638306, + "learning_rate": 2.3924802406798642e-05, + "loss": 0.11522674560546875, + "step": 4757 + }, + { + "epoch": 0.3216168717047452, + "grad_norm": 0.432442307472229, + "learning_rate": 2.392215024153603e-05, + "loss": 0.0894775390625, + "step": 4758 + }, + { + "epoch": 0.32168446667567935, + "grad_norm": 0.9033101797103882, + "learning_rate": 2.3919497644564302e-05, + "loss": 0.210601806640625, + "step": 4759 + }, + { + "epoch": 0.3217520616466135, + "grad_norm": 1.0834906101226807, + "learning_rate": 2.3916844616011804e-05, + "loss": 0.1934814453125, + "step": 4760 + }, + { + "epoch": 0.32181965661754763, + "grad_norm": 0.9890915155410767, + "learning_rate": 2.3914191156006915e-05, + "loss": 0.174285888671875, + "step": 4761 + }, + { + "epoch": 0.3218872515884818, + "grad_norm": 0.7679158449172974, + "learning_rate": 2.3911537264678017e-05, + "loss": 0.19891357421875, + "step": 4762 + }, + { + "epoch": 0.321954846559416, + "grad_norm": 0.5633974075317383, + "learning_rate": 2.390888294215353e-05, + "loss": 0.150146484375, + "step": 4763 + }, + { + "epoch": 0.32202244153035015, + "grad_norm": 0.7874541282653809, + "learning_rate": 2.3906228188561882e-05, + "loss": 0.162750244140625, + "step": 4764 + }, + { + "epoch": 0.3220900365012843, + "grad_norm": 0.5599070191383362, + "learning_rate": 2.3903573004031527e-05, + "loss": 0.1052703857421875, + "step": 4765 + }, + { + "epoch": 0.3221576314722185, + "grad_norm": 0.3950929045677185, + "learning_rate": 2.390091738869094e-05, + "loss": 0.045040130615234375, + "step": 4766 + }, + { + "epoch": 0.32222522644315266, + "grad_norm": 0.24468356370925903, + "learning_rate": 2.3898261342668613e-05, + "loss": 0.0430908203125, + "step": 4767 + }, + { + "epoch": 0.32229282141408677, + "grad_norm": 0.4781317412853241, + "learning_rate": 2.3895604866093062e-05, + "loss": 0.046497344970703125, + "step": 4768 + }, + { + "epoch": 0.32236041638502094, + "grad_norm": 0.4440094232559204, + "learning_rate": 2.389294795909283e-05, + "loss": 0.0743408203125, + "step": 4769 + }, + { + "epoch": 0.3224280113559551, + "grad_norm": 0.898849606513977, + "learning_rate": 2.3890290621796468e-05, + "loss": 0.13457107543945312, + "step": 4770 + }, + { + "epoch": 0.3224956063268893, + "grad_norm": 0.607646644115448, + "learning_rate": 2.3887632854332564e-05, + "loss": 0.12682342529296875, + "step": 4771 + }, + { + "epoch": 0.32256320129782345, + "grad_norm": 0.30678579211235046, + "learning_rate": 2.3884974656829705e-05, + "loss": 0.05304718017578125, + "step": 4772 + }, + { + "epoch": 0.3226307962687576, + "grad_norm": 0.9588409662246704, + "learning_rate": 2.388231602941652e-05, + "loss": 0.09381484985351562, + "step": 4773 + }, + { + "epoch": 0.32269839123969174, + "grad_norm": 0.949133574962616, + "learning_rate": 2.3879656972221646e-05, + "loss": 0.18304443359375, + "step": 4774 + }, + { + "epoch": 0.3227659862106259, + "grad_norm": 0.4105973243713379, + "learning_rate": 2.387699748537375e-05, + "loss": 0.07736587524414062, + "step": 4775 + }, + { + "epoch": 0.3228335811815601, + "grad_norm": 2.839353322982788, + "learning_rate": 2.3874337569001505e-05, + "loss": 0.17036819458007812, + "step": 4776 + }, + { + "epoch": 0.32290117615249425, + "grad_norm": 0.4992651641368866, + "learning_rate": 2.387167722323362e-05, + "loss": 0.09236907958984375, + "step": 4777 + }, + { + "epoch": 0.3229687711234284, + "grad_norm": 0.39772170782089233, + "learning_rate": 2.386901644819882e-05, + "loss": 0.06484222412109375, + "step": 4778 + }, + { + "epoch": 0.3230363660943626, + "grad_norm": 0.7268570065498352, + "learning_rate": 2.3866355244025844e-05, + "loss": 0.142303466796875, + "step": 4779 + }, + { + "epoch": 0.32310396106529676, + "grad_norm": 0.8557929396629333, + "learning_rate": 2.386369361084347e-05, + "loss": 0.190460205078125, + "step": 4780 + }, + { + "epoch": 0.3231715560362309, + "grad_norm": 1.207416296005249, + "learning_rate": 2.3861031548780472e-05, + "loss": 0.1481170654296875, + "step": 4781 + }, + { + "epoch": 0.32323915100716505, + "grad_norm": 1.3938488960266113, + "learning_rate": 2.385836905796566e-05, + "loss": 0.158416748046875, + "step": 4782 + }, + { + "epoch": 0.3233067459780992, + "grad_norm": 0.44224533438682556, + "learning_rate": 2.3855706138527863e-05, + "loss": 0.07365036010742188, + "step": 4783 + }, + { + "epoch": 0.3233743409490334, + "grad_norm": 0.30815285444259644, + "learning_rate": 2.385304279059593e-05, + "loss": 0.072479248046875, + "step": 4784 + }, + { + "epoch": 0.32344193591996756, + "grad_norm": 0.3178345859050751, + "learning_rate": 2.3850379014298737e-05, + "loss": 0.0633087158203125, + "step": 4785 + }, + { + "epoch": 0.32350953089090173, + "grad_norm": 0.5003637075424194, + "learning_rate": 2.3847714809765165e-05, + "loss": 0.0779876708984375, + "step": 4786 + }, + { + "epoch": 0.3235771258618359, + "grad_norm": 0.8197936415672302, + "learning_rate": 2.384505017712412e-05, + "loss": 0.2159423828125, + "step": 4787 + }, + { + "epoch": 0.32364472083277, + "grad_norm": 1.0344905853271484, + "learning_rate": 2.3842385116504547e-05, + "loss": 0.1725616455078125, + "step": 4788 + }, + { + "epoch": 0.3237123158037042, + "grad_norm": 0.6253617405891418, + "learning_rate": 2.383971962803539e-05, + "loss": 0.11090850830078125, + "step": 4789 + }, + { + "epoch": 0.32377991077463836, + "grad_norm": 1.2560008764266968, + "learning_rate": 2.3837053711845626e-05, + "loss": 0.1831817626953125, + "step": 4790 + }, + { + "epoch": 0.32384750574557253, + "grad_norm": 0.4960574209690094, + "learning_rate": 2.3834387368064243e-05, + "loss": 0.1039886474609375, + "step": 4791 + }, + { + "epoch": 0.3239151007165067, + "grad_norm": 0.4539813995361328, + "learning_rate": 2.383172059682026e-05, + "loss": 0.0708160400390625, + "step": 4792 + }, + { + "epoch": 0.32398269568744087, + "grad_norm": 2.1298224925994873, + "learning_rate": 2.3829053398242708e-05, + "loss": 0.2186126708984375, + "step": 4793 + }, + { + "epoch": 0.32405029065837504, + "grad_norm": 0.7559239268302917, + "learning_rate": 2.382638577246065e-05, + "loss": 0.18646240234375, + "step": 4794 + }, + { + "epoch": 0.32411788562930915, + "grad_norm": 0.8537335991859436, + "learning_rate": 2.3823717719603156e-05, + "loss": 0.181884765625, + "step": 4795 + }, + { + "epoch": 0.3241854806002433, + "grad_norm": 0.3838200569152832, + "learning_rate": 2.3821049239799325e-05, + "loss": 0.10321044921875, + "step": 4796 + }, + { + "epoch": 0.3242530755711775, + "grad_norm": 0.6001983880996704, + "learning_rate": 2.3818380333178273e-05, + "loss": 0.0851898193359375, + "step": 4797 + }, + { + "epoch": 0.32432067054211167, + "grad_norm": 1.203104019165039, + "learning_rate": 2.3815710999869143e-05, + "loss": 0.19940185546875, + "step": 4798 + }, + { + "epoch": 0.32438826551304584, + "grad_norm": 0.361698180437088, + "learning_rate": 2.3813041240001083e-05, + "loss": 0.07461166381835938, + "step": 4799 + }, + { + "epoch": 0.32445586048398, + "grad_norm": 0.6172600984573364, + "learning_rate": 2.3810371053703287e-05, + "loss": 0.1209564208984375, + "step": 4800 + }, + { + "epoch": 0.3245234554549142, + "grad_norm": 0.6087688207626343, + "learning_rate": 2.3807700441104946e-05, + "loss": 0.1306304931640625, + "step": 4801 + }, + { + "epoch": 0.3245910504258483, + "grad_norm": 0.42176032066345215, + "learning_rate": 2.3805029402335282e-05, + "loss": 0.06329345703125, + "step": 4802 + }, + { + "epoch": 0.32465864539678246, + "grad_norm": 0.440811425447464, + "learning_rate": 2.3802357937523537e-05, + "loss": 0.06177520751953125, + "step": 4803 + }, + { + "epoch": 0.32472624036771663, + "grad_norm": 0.5852611660957336, + "learning_rate": 2.379968604679898e-05, + "loss": 0.11841583251953125, + "step": 4804 + }, + { + "epoch": 0.3247938353386508, + "grad_norm": 0.6504207849502563, + "learning_rate": 2.379701373029088e-05, + "loss": 0.12137603759765625, + "step": 4805 + }, + { + "epoch": 0.324861430309585, + "grad_norm": 1.0500446557998657, + "learning_rate": 2.379434098812855e-05, + "loss": 0.14189910888671875, + "step": 4806 + }, + { + "epoch": 0.32492902528051915, + "grad_norm": 0.39406657218933105, + "learning_rate": 2.379166782044131e-05, + "loss": 0.06763458251953125, + "step": 4807 + }, + { + "epoch": 0.3249966202514533, + "grad_norm": 0.5753778219223022, + "learning_rate": 2.3788994227358513e-05, + "loss": 0.115020751953125, + "step": 4808 + }, + { + "epoch": 0.32506421522238743, + "grad_norm": 1.1524618864059448, + "learning_rate": 2.3786320209009514e-05, + "loss": 0.186859130859375, + "step": 4809 + }, + { + "epoch": 0.3251318101933216, + "grad_norm": 0.6755850315093994, + "learning_rate": 2.3783645765523702e-05, + "loss": 0.129364013671875, + "step": 4810 + }, + { + "epoch": 0.3251994051642558, + "grad_norm": 0.2675783634185791, + "learning_rate": 2.378097089703048e-05, + "loss": 0.04927825927734375, + "step": 4811 + }, + { + "epoch": 0.32526700013518994, + "grad_norm": 0.6978342533111572, + "learning_rate": 2.3778295603659275e-05, + "loss": 0.181640625, + "step": 4812 + }, + { + "epoch": 0.3253345951061241, + "grad_norm": 0.5574337244033813, + "learning_rate": 2.377561988553954e-05, + "loss": 0.1451568603515625, + "step": 4813 + }, + { + "epoch": 0.3254021900770583, + "grad_norm": 0.5200694799423218, + "learning_rate": 2.377294374280074e-05, + "loss": 0.09759521484375, + "step": 4814 + }, + { + "epoch": 0.32546978504799245, + "grad_norm": 1.1279646158218384, + "learning_rate": 2.3770267175572367e-05, + "loss": 0.210479736328125, + "step": 4815 + }, + { + "epoch": 0.32553738001892657, + "grad_norm": 1.1923248767852783, + "learning_rate": 2.3767590183983923e-05, + "loss": 0.14496612548828125, + "step": 4816 + }, + { + "epoch": 0.32560497498986074, + "grad_norm": 0.6303550601005554, + "learning_rate": 2.376491276816494e-05, + "loss": 0.10261917114257812, + "step": 4817 + }, + { + "epoch": 0.3256725699607949, + "grad_norm": 1.3865896463394165, + "learning_rate": 2.3762234928244973e-05, + "loss": 0.200592041015625, + "step": 4818 + }, + { + "epoch": 0.3257401649317291, + "grad_norm": 1.1507850885391235, + "learning_rate": 2.375955666435358e-05, + "loss": 0.15262603759765625, + "step": 4819 + }, + { + "epoch": 0.32580775990266325, + "grad_norm": 2.1029744148254395, + "learning_rate": 2.3756877976620372e-05, + "loss": 0.2742156982421875, + "step": 4820 + }, + { + "epoch": 0.3258753548735974, + "grad_norm": 1.1347930431365967, + "learning_rate": 2.3754198865174944e-05, + "loss": 0.27130126953125, + "step": 4821 + }, + { + "epoch": 0.3259429498445316, + "grad_norm": 0.47824835777282715, + "learning_rate": 2.375151933014693e-05, + "loss": 0.0901947021484375, + "step": 4822 + }, + { + "epoch": 0.3260105448154657, + "grad_norm": 0.9703144431114197, + "learning_rate": 2.3748839371665987e-05, + "loss": 0.1707763671875, + "step": 4823 + }, + { + "epoch": 0.3260781397863999, + "grad_norm": 1.1358733177185059, + "learning_rate": 2.374615898986179e-05, + "loss": 0.173828125, + "step": 4824 + }, + { + "epoch": 0.32614573475733405, + "grad_norm": 0.5936216711997986, + "learning_rate": 2.3743478184864026e-05, + "loss": 0.100616455078125, + "step": 4825 + }, + { + "epoch": 0.3262133297282682, + "grad_norm": 2.440664291381836, + "learning_rate": 2.374079695680241e-05, + "loss": 0.351165771484375, + "step": 4826 + }, + { + "epoch": 0.3262809246992024, + "grad_norm": 0.5099789500236511, + "learning_rate": 2.3738115305806687e-05, + "loss": 0.04510498046875, + "step": 4827 + }, + { + "epoch": 0.32634851967013656, + "grad_norm": 0.739219605922699, + "learning_rate": 2.3735433232006597e-05, + "loss": 0.13270950317382812, + "step": 4828 + }, + { + "epoch": 0.32641611464107073, + "grad_norm": 0.7157096266746521, + "learning_rate": 2.3732750735531926e-05, + "loss": 0.11773681640625, + "step": 4829 + }, + { + "epoch": 0.32648370961200485, + "grad_norm": 0.7118630409240723, + "learning_rate": 2.3730067816512464e-05, + "loss": 0.1566925048828125, + "step": 4830 + }, + { + "epoch": 0.326551304582939, + "grad_norm": 1.697868824005127, + "learning_rate": 2.3727384475078028e-05, + "loss": 0.16400146484375, + "step": 4831 + }, + { + "epoch": 0.3266188995538732, + "grad_norm": 0.4055582582950592, + "learning_rate": 2.3724700711358456e-05, + "loss": 0.07520294189453125, + "step": 4832 + }, + { + "epoch": 0.32668649452480736, + "grad_norm": 0.6688006520271301, + "learning_rate": 2.372201652548361e-05, + "loss": 0.154541015625, + "step": 4833 + }, + { + "epoch": 0.32675408949574153, + "grad_norm": 1.0837032794952393, + "learning_rate": 2.3719331917583358e-05, + "loss": 0.1973876953125, + "step": 4834 + }, + { + "epoch": 0.3268216844666757, + "grad_norm": 1.140454888343811, + "learning_rate": 2.3716646887787604e-05, + "loss": 0.24365234375, + "step": 4835 + }, + { + "epoch": 0.3268892794376098, + "grad_norm": 0.7747141122817993, + "learning_rate": 2.3713961436226264e-05, + "loss": 0.16668701171875, + "step": 4836 + }, + { + "epoch": 0.326956874408544, + "grad_norm": 0.3087559640407562, + "learning_rate": 2.371127556302928e-05, + "loss": 0.05667877197265625, + "step": 4837 + }, + { + "epoch": 0.32702446937947816, + "grad_norm": 0.4415244162082672, + "learning_rate": 2.3708589268326612e-05, + "loss": 0.08293914794921875, + "step": 4838 + }, + { + "epoch": 0.3270920643504123, + "grad_norm": 0.23198916018009186, + "learning_rate": 2.3705902552248234e-05, + "loss": 0.031219482421875, + "step": 4839 + }, + { + "epoch": 0.3271596593213465, + "grad_norm": 0.27627792954444885, + "learning_rate": 2.3703215414924146e-05, + "loss": 0.048027992248535156, + "step": 4840 + }, + { + "epoch": 0.32722725429228067, + "grad_norm": 1.009171724319458, + "learning_rate": 2.3700527856484376e-05, + "loss": 0.23613739013671875, + "step": 4841 + }, + { + "epoch": 0.32729484926321484, + "grad_norm": 0.2591935694217682, + "learning_rate": 2.3697839877058963e-05, + "loss": 0.042736053466796875, + "step": 4842 + }, + { + "epoch": 0.32736244423414895, + "grad_norm": 0.26066720485687256, + "learning_rate": 2.369515147677796e-05, + "loss": 0.05429840087890625, + "step": 4843 + }, + { + "epoch": 0.3274300392050831, + "grad_norm": 1.1234073638916016, + "learning_rate": 2.3692462655771458e-05, + "loss": 0.22540283203125, + "step": 4844 + }, + { + "epoch": 0.3274976341760173, + "grad_norm": 0.3066336214542389, + "learning_rate": 2.368977341416955e-05, + "loss": 0.06937408447265625, + "step": 4845 + }, + { + "epoch": 0.32756522914695146, + "grad_norm": 0.6310213804244995, + "learning_rate": 2.3687083752102366e-05, + "loss": 0.133819580078125, + "step": 4846 + }, + { + "epoch": 0.32763282411788563, + "grad_norm": 0.6876088976860046, + "learning_rate": 2.3684393669700044e-05, + "loss": 0.143341064453125, + "step": 4847 + }, + { + "epoch": 0.3277004190888198, + "grad_norm": 0.6749841570854187, + "learning_rate": 2.368170316709275e-05, + "loss": 0.11783599853515625, + "step": 4848 + }, + { + "epoch": 0.327768014059754, + "grad_norm": 0.26195675134658813, + "learning_rate": 2.3679012244410667e-05, + "loss": 0.0531463623046875, + "step": 4849 + }, + { + "epoch": 0.3278356090306881, + "grad_norm": 0.44555529952049255, + "learning_rate": 2.3676320901783995e-05, + "loss": 0.0635833740234375, + "step": 4850 + }, + { + "epoch": 0.32790320400162226, + "grad_norm": 0.9579190611839294, + "learning_rate": 2.3673629139342963e-05, + "loss": 0.16433334350585938, + "step": 4851 + }, + { + "epoch": 0.32797079897255643, + "grad_norm": 0.9815099835395813, + "learning_rate": 2.367093695721781e-05, + "loss": 0.12882232666015625, + "step": 4852 + }, + { + "epoch": 0.3280383939434906, + "grad_norm": 0.5249646306037903, + "learning_rate": 2.3668244355538805e-05, + "loss": 0.07592010498046875, + "step": 4853 + }, + { + "epoch": 0.3281059889144248, + "grad_norm": 0.21529340744018555, + "learning_rate": 2.366555133443623e-05, + "loss": 0.021266937255859375, + "step": 4854 + }, + { + "epoch": 0.32817358388535894, + "grad_norm": 1.146409511566162, + "learning_rate": 2.366285789404039e-05, + "loss": 0.195404052734375, + "step": 4855 + }, + { + "epoch": 0.3282411788562931, + "grad_norm": 0.2857545018196106, + "learning_rate": 2.3660164034481613e-05, + "loss": 0.032405853271484375, + "step": 4856 + }, + { + "epoch": 0.32830877382722723, + "grad_norm": 0.4560707211494446, + "learning_rate": 2.3657469755890244e-05, + "loss": 0.0811309814453125, + "step": 4857 + }, + { + "epoch": 0.3283763687981614, + "grad_norm": 0.3500584363937378, + "learning_rate": 2.3654775058396644e-05, + "loss": 0.056659698486328125, + "step": 4858 + }, + { + "epoch": 0.32844396376909557, + "grad_norm": 0.4157167971134186, + "learning_rate": 2.365207994213121e-05, + "loss": 0.1130828857421875, + "step": 4859 + }, + { + "epoch": 0.32851155874002974, + "grad_norm": 0.8981859683990479, + "learning_rate": 2.3649384407224334e-05, + "loss": 0.11693572998046875, + "step": 4860 + }, + { + "epoch": 0.3285791537109639, + "grad_norm": 1.8952429294586182, + "learning_rate": 2.3646688453806455e-05, + "loss": 0.18926239013671875, + "step": 4861 + }, + { + "epoch": 0.3286467486818981, + "grad_norm": 1.7940139770507812, + "learning_rate": 2.3643992082008013e-05, + "loss": 0.210784912109375, + "step": 4862 + }, + { + "epoch": 0.32871434365283225, + "grad_norm": 0.3712768852710724, + "learning_rate": 2.364129529195948e-05, + "loss": 0.055938720703125, + "step": 4863 + }, + { + "epoch": 0.32878193862376637, + "grad_norm": 0.6326697468757629, + "learning_rate": 2.3638598083791337e-05, + "loss": 0.143096923828125, + "step": 4864 + }, + { + "epoch": 0.32884953359470054, + "grad_norm": 0.32070571184158325, + "learning_rate": 2.36359004576341e-05, + "loss": 0.07756423950195312, + "step": 4865 + }, + { + "epoch": 0.3289171285656347, + "grad_norm": 0.45877015590667725, + "learning_rate": 2.363320241361829e-05, + "loss": 0.09908294677734375, + "step": 4866 + }, + { + "epoch": 0.3289847235365689, + "grad_norm": 0.392230749130249, + "learning_rate": 2.363050395187446e-05, + "loss": 0.07808303833007812, + "step": 4867 + }, + { + "epoch": 0.32905231850750305, + "grad_norm": 0.31064197421073914, + "learning_rate": 2.3627805072533174e-05, + "loss": 0.0785980224609375, + "step": 4868 + }, + { + "epoch": 0.3291199134784372, + "grad_norm": 0.3270653188228607, + "learning_rate": 2.362510577572503e-05, + "loss": 0.078765869140625, + "step": 4869 + }, + { + "epoch": 0.3291875084493714, + "grad_norm": 1.092382550239563, + "learning_rate": 2.362240606158062e-05, + "loss": 0.15288543701171875, + "step": 4870 + }, + { + "epoch": 0.3292551034203055, + "grad_norm": 0.7786378860473633, + "learning_rate": 2.3619705930230587e-05, + "loss": 0.17132568359375, + "step": 4871 + }, + { + "epoch": 0.3293226983912397, + "grad_norm": 0.7105093598365784, + "learning_rate": 2.3617005381805574e-05, + "loss": 0.193634033203125, + "step": 4872 + }, + { + "epoch": 0.32939029336217385, + "grad_norm": 0.5055171847343445, + "learning_rate": 2.3614304416436257e-05, + "loss": 0.11051177978515625, + "step": 4873 + }, + { + "epoch": 0.329457888333108, + "grad_norm": 0.29343748092651367, + "learning_rate": 2.361160303425332e-05, + "loss": 0.068023681640625, + "step": 4874 + }, + { + "epoch": 0.3295254833040422, + "grad_norm": 1.0251870155334473, + "learning_rate": 2.3608901235387464e-05, + "loss": 0.1475982666015625, + "step": 4875 + }, + { + "epoch": 0.32959307827497636, + "grad_norm": 0.7414377927780151, + "learning_rate": 2.3606199019969437e-05, + "loss": 0.13126373291015625, + "step": 4876 + }, + { + "epoch": 0.32966067324591053, + "grad_norm": 0.47879084944725037, + "learning_rate": 2.3603496388129976e-05, + "loss": 0.117828369140625, + "step": 4877 + }, + { + "epoch": 0.32972826821684464, + "grad_norm": 0.4241073429584503, + "learning_rate": 2.3600793339999855e-05, + "loss": 0.075042724609375, + "step": 4878 + }, + { + "epoch": 0.3297958631877788, + "grad_norm": 0.3620228171348572, + "learning_rate": 2.3598089875709868e-05, + "loss": 0.06388092041015625, + "step": 4879 + }, + { + "epoch": 0.329863458158713, + "grad_norm": 0.6148896217346191, + "learning_rate": 2.3595385995390822e-05, + "loss": 0.12160491943359375, + "step": 4880 + }, + { + "epoch": 0.32993105312964716, + "grad_norm": 0.33647191524505615, + "learning_rate": 2.3592681699173544e-05, + "loss": 0.06903839111328125, + "step": 4881 + }, + { + "epoch": 0.3299986481005813, + "grad_norm": 1.275201439857483, + "learning_rate": 2.358997698718889e-05, + "loss": 0.225616455078125, + "step": 4882 + }, + { + "epoch": 0.3300662430715155, + "grad_norm": 1.3819414377212524, + "learning_rate": 2.3587271859567725e-05, + "loss": 0.229583740234375, + "step": 4883 + }, + { + "epoch": 0.33013383804244967, + "grad_norm": 0.4979487657546997, + "learning_rate": 2.358456631644095e-05, + "loss": 0.0793304443359375, + "step": 4884 + }, + { + "epoch": 0.3302014330133838, + "grad_norm": 0.27687615156173706, + "learning_rate": 2.3581860357939467e-05, + "loss": 0.045871734619140625, + "step": 4885 + }, + { + "epoch": 0.33026902798431795, + "grad_norm": 1.0267467498779297, + "learning_rate": 2.3579153984194206e-05, + "loss": 0.169525146484375, + "step": 4886 + }, + { + "epoch": 0.3303366229552521, + "grad_norm": 1.9451583623886108, + "learning_rate": 2.3576447195336123e-05, + "loss": 0.181976318359375, + "step": 4887 + }, + { + "epoch": 0.3304042179261863, + "grad_norm": 0.39656442403793335, + "learning_rate": 2.3573739991496196e-05, + "loss": 0.08599853515625, + "step": 4888 + }, + { + "epoch": 0.33047181289712046, + "grad_norm": 0.48672211170196533, + "learning_rate": 2.3571032372805403e-05, + "loss": 0.037586212158203125, + "step": 4889 + }, + { + "epoch": 0.33053940786805464, + "grad_norm": 0.7935538291931152, + "learning_rate": 2.3568324339394764e-05, + "loss": 0.15228271484375, + "step": 4890 + }, + { + "epoch": 0.3306070028389888, + "grad_norm": 0.2602421045303345, + "learning_rate": 2.3565615891395303e-05, + "loss": 0.0596466064453125, + "step": 4891 + }, + { + "epoch": 0.3306745978099229, + "grad_norm": 0.803732693195343, + "learning_rate": 2.356290702893808e-05, + "loss": 0.149566650390625, + "step": 4892 + }, + { + "epoch": 0.3307421927808571, + "grad_norm": 0.6853134036064148, + "learning_rate": 2.356019775215416e-05, + "loss": 0.096923828125, + "step": 4893 + }, + { + "epoch": 0.33080978775179126, + "grad_norm": 1.0826460123062134, + "learning_rate": 2.355748806117464e-05, + "loss": 0.195465087890625, + "step": 4894 + }, + { + "epoch": 0.33087738272272543, + "grad_norm": 1.2689803838729858, + "learning_rate": 2.3554777956130625e-05, + "loss": 0.196258544921875, + "step": 4895 + }, + { + "epoch": 0.3309449776936596, + "grad_norm": 0.9052737355232239, + "learning_rate": 2.3552067437153258e-05, + "loss": 0.1655120849609375, + "step": 4896 + }, + { + "epoch": 0.3310125726645938, + "grad_norm": 1.0122129917144775, + "learning_rate": 2.3549356504373678e-05, + "loss": 0.224517822265625, + "step": 4897 + }, + { + "epoch": 0.33108016763552794, + "grad_norm": 0.3142913281917572, + "learning_rate": 2.3546645157923065e-05, + "loss": 0.04395294189453125, + "step": 4898 + }, + { + "epoch": 0.33114776260646206, + "grad_norm": 1.0215280055999756, + "learning_rate": 2.3543933397932603e-05, + "loss": 0.2127685546875, + "step": 4899 + }, + { + "epoch": 0.33121535757739623, + "grad_norm": 0.6684781908988953, + "learning_rate": 2.3541221224533515e-05, + "loss": 0.1414031982421875, + "step": 4900 + }, + { + "epoch": 0.3312829525483304, + "grad_norm": 0.3628224730491638, + "learning_rate": 2.3538508637857026e-05, + "loss": 0.0809326171875, + "step": 4901 + }, + { + "epoch": 0.33135054751926457, + "grad_norm": 0.5009320378303528, + "learning_rate": 2.353579563803439e-05, + "loss": 0.11008453369140625, + "step": 4902 + }, + { + "epoch": 0.33141814249019874, + "grad_norm": 0.7789487838745117, + "learning_rate": 2.3533082225196876e-05, + "loss": 0.147613525390625, + "step": 4903 + }, + { + "epoch": 0.3314857374611329, + "grad_norm": 1.0812022686004639, + "learning_rate": 2.353036839947577e-05, + "loss": 0.12969207763671875, + "step": 4904 + }, + { + "epoch": 0.331553332432067, + "grad_norm": 1.2589023113250732, + "learning_rate": 2.3527654161002402e-05, + "loss": 0.1619110107421875, + "step": 4905 + }, + { + "epoch": 0.3316209274030012, + "grad_norm": 1.322041392326355, + "learning_rate": 2.3524939509908087e-05, + "loss": 0.14938735961914062, + "step": 4906 + }, + { + "epoch": 0.33168852237393537, + "grad_norm": 0.404863566160202, + "learning_rate": 2.3522224446324183e-05, + "loss": 0.0815887451171875, + "step": 4907 + }, + { + "epoch": 0.33175611734486954, + "grad_norm": 0.6871452927589417, + "learning_rate": 2.3519508970382062e-05, + "loss": 0.1560211181640625, + "step": 4908 + }, + { + "epoch": 0.3318237123158037, + "grad_norm": 0.24310919642448425, + "learning_rate": 2.351679308221312e-05, + "loss": 0.036121368408203125, + "step": 4909 + }, + { + "epoch": 0.3318913072867379, + "grad_norm": 0.5173813700675964, + "learning_rate": 2.3514076781948755e-05, + "loss": 0.054351806640625, + "step": 4910 + }, + { + "epoch": 0.33195890225767205, + "grad_norm": 0.4060624837875366, + "learning_rate": 2.351136006972041e-05, + "loss": 0.0740966796875, + "step": 4911 + }, + { + "epoch": 0.33202649722860617, + "grad_norm": 0.9012652039527893, + "learning_rate": 2.350864294565953e-05, + "loss": 0.184600830078125, + "step": 4912 + }, + { + "epoch": 0.33209409219954034, + "grad_norm": 0.33096861839294434, + "learning_rate": 2.3505925409897596e-05, + "loss": 0.027309417724609375, + "step": 4913 + }, + { + "epoch": 0.3321616871704745, + "grad_norm": 0.8176455497741699, + "learning_rate": 2.350320746256609e-05, + "loss": 0.18438720703125, + "step": 4914 + }, + { + "epoch": 0.3322292821414087, + "grad_norm": 0.3119303584098816, + "learning_rate": 2.3500489103796528e-05, + "loss": 0.03635978698730469, + "step": 4915 + }, + { + "epoch": 0.33229687711234285, + "grad_norm": 1.021807074546814, + "learning_rate": 2.3497770333720432e-05, + "loss": 0.15583038330078125, + "step": 4916 + }, + { + "epoch": 0.332364472083277, + "grad_norm": 0.28155285120010376, + "learning_rate": 2.3495051152469367e-05, + "loss": 0.0241851806640625, + "step": 4917 + }, + { + "epoch": 0.3324320670542112, + "grad_norm": 0.9386159777641296, + "learning_rate": 2.3492331560174892e-05, + "loss": 0.21380615234375, + "step": 4918 + }, + { + "epoch": 0.3324996620251453, + "grad_norm": 0.37642723321914673, + "learning_rate": 2.348961155696861e-05, + "loss": 0.058147430419921875, + "step": 4919 + }, + { + "epoch": 0.3325672569960795, + "grad_norm": 0.40729475021362305, + "learning_rate": 2.348689114298212e-05, + "loss": 0.106292724609375, + "step": 4920 + }, + { + "epoch": 0.33263485196701364, + "grad_norm": 0.33546265959739685, + "learning_rate": 2.348417031834706e-05, + "loss": 0.042613983154296875, + "step": 4921 + }, + { + "epoch": 0.3327024469379478, + "grad_norm": 0.28004544973373413, + "learning_rate": 2.348144908319507e-05, + "loss": 0.05757904052734375, + "step": 4922 + }, + { + "epoch": 0.332770041908882, + "grad_norm": 0.25978216528892517, + "learning_rate": 2.3478727437657838e-05, + "loss": 0.07440948486328125, + "step": 4923 + }, + { + "epoch": 0.33283763687981616, + "grad_norm": 0.5335596203804016, + "learning_rate": 2.3476005381867034e-05, + "loss": 0.06207275390625, + "step": 4924 + }, + { + "epoch": 0.3329052318507503, + "grad_norm": 0.3563791513442993, + "learning_rate": 2.3473282915954384e-05, + "loss": 0.068328857421875, + "step": 4925 + }, + { + "epoch": 0.33297282682168444, + "grad_norm": 0.29768165946006775, + "learning_rate": 2.3470560040051613e-05, + "loss": 0.06154632568359375, + "step": 4926 + }, + { + "epoch": 0.3330404217926186, + "grad_norm": 0.3412517309188843, + "learning_rate": 2.3467836754290467e-05, + "loss": 0.0715789794921875, + "step": 4927 + }, + { + "epoch": 0.3331080167635528, + "grad_norm": 0.41023868322372437, + "learning_rate": 2.3465113058802717e-05, + "loss": 0.079925537109375, + "step": 4928 + }, + { + "epoch": 0.33317561173448695, + "grad_norm": 0.5496575236320496, + "learning_rate": 2.3462388953720153e-05, + "loss": 0.11534881591796875, + "step": 4929 + }, + { + "epoch": 0.3332432067054211, + "grad_norm": 0.5461713075637817, + "learning_rate": 2.345966443917459e-05, + "loss": 0.0726165771484375, + "step": 4930 + }, + { + "epoch": 0.3333108016763553, + "grad_norm": 1.603904366493225, + "learning_rate": 2.3456939515297842e-05, + "loss": 0.26629638671875, + "step": 4931 + }, + { + "epoch": 0.33337839664728947, + "grad_norm": 0.35134923458099365, + "learning_rate": 2.3454214182221773e-05, + "loss": 0.050933837890625, + "step": 4932 + }, + { + "epoch": 0.3334459916182236, + "grad_norm": 0.49077308177948, + "learning_rate": 2.3451488440078246e-05, + "loss": 0.085418701171875, + "step": 4933 + }, + { + "epoch": 0.33351358658915775, + "grad_norm": 0.6305787563323975, + "learning_rate": 2.3448762288999145e-05, + "loss": 0.1358184814453125, + "step": 4934 + }, + { + "epoch": 0.3335811815600919, + "grad_norm": 0.24692152440547943, + "learning_rate": 2.3446035729116387e-05, + "loss": 0.047607421875, + "step": 4935 + }, + { + "epoch": 0.3336487765310261, + "grad_norm": 1.010581374168396, + "learning_rate": 2.344330876056189e-05, + "loss": 0.14849090576171875, + "step": 4936 + }, + { + "epoch": 0.33371637150196026, + "grad_norm": 0.7251539826393127, + "learning_rate": 2.3440581383467613e-05, + "loss": 0.1929931640625, + "step": 4937 + }, + { + "epoch": 0.33378396647289443, + "grad_norm": 0.19118742644786835, + "learning_rate": 2.3437853597965514e-05, + "loss": 0.04174041748046875, + "step": 4938 + }, + { + "epoch": 0.3338515614438286, + "grad_norm": 0.3480531573295593, + "learning_rate": 2.3435125404187585e-05, + "loss": 0.04646110534667969, + "step": 4939 + }, + { + "epoch": 0.3339191564147627, + "grad_norm": 0.790345311164856, + "learning_rate": 2.3432396802265828e-05, + "loss": 0.1737060546875, + "step": 4940 + }, + { + "epoch": 0.3339867513856969, + "grad_norm": 0.6688593626022339, + "learning_rate": 2.3429667792332278e-05, + "loss": 0.12569427490234375, + "step": 4941 + }, + { + "epoch": 0.33405434635663106, + "grad_norm": 0.9529544711112976, + "learning_rate": 2.342693837451897e-05, + "loss": 0.19646453857421875, + "step": 4942 + }, + { + "epoch": 0.33412194132756523, + "grad_norm": 0.6678045392036438, + "learning_rate": 2.342420854895798e-05, + "loss": 0.13525390625, + "step": 4943 + }, + { + "epoch": 0.3341895362984994, + "grad_norm": 0.3949820101261139, + "learning_rate": 2.3421478315781393e-05, + "loss": 0.12165069580078125, + "step": 4944 + }, + { + "epoch": 0.33425713126943357, + "grad_norm": 0.3605119287967682, + "learning_rate": 2.341874767512131e-05, + "loss": 0.05130767822265625, + "step": 4945 + }, + { + "epoch": 0.33432472624036774, + "grad_norm": 1.2704956531524658, + "learning_rate": 2.3416016627109863e-05, + "loss": 0.268890380859375, + "step": 4946 + }, + { + "epoch": 0.33439232121130186, + "grad_norm": 0.3984356224536896, + "learning_rate": 2.3413285171879188e-05, + "loss": 0.035877227783203125, + "step": 4947 + }, + { + "epoch": 0.33445991618223603, + "grad_norm": 0.4081139862537384, + "learning_rate": 2.3410553309561456e-05, + "loss": 0.0544586181640625, + "step": 4948 + }, + { + "epoch": 0.3345275111531702, + "grad_norm": 0.19202633202075958, + "learning_rate": 2.340782104028885e-05, + "loss": 0.03519439697265625, + "step": 4949 + }, + { + "epoch": 0.33459510612410437, + "grad_norm": 0.7195864915847778, + "learning_rate": 2.3405088364193577e-05, + "loss": 0.1355743408203125, + "step": 4950 + }, + { + "epoch": 0.33466270109503854, + "grad_norm": 0.5237389206886292, + "learning_rate": 2.3402355281407854e-05, + "loss": 0.0869903564453125, + "step": 4951 + }, + { + "epoch": 0.3347302960659727, + "grad_norm": 0.24409285187721252, + "learning_rate": 2.339962179206393e-05, + "loss": 0.04839324951171875, + "step": 4952 + }, + { + "epoch": 0.3347978910369069, + "grad_norm": 0.5717309713363647, + "learning_rate": 2.339688789629407e-05, + "loss": 0.10230255126953125, + "step": 4953 + }, + { + "epoch": 0.334865486007841, + "grad_norm": 0.2013513445854187, + "learning_rate": 2.339415359423055e-05, + "loss": 0.0318450927734375, + "step": 4954 + }, + { + "epoch": 0.33493308097877517, + "grad_norm": 0.2452668398618698, + "learning_rate": 2.3391418886005682e-05, + "loss": 0.03702545166015625, + "step": 4955 + }, + { + "epoch": 0.33500067594970934, + "grad_norm": 0.19646769762039185, + "learning_rate": 2.3388683771751775e-05, + "loss": 0.046952247619628906, + "step": 4956 + }, + { + "epoch": 0.3350682709206435, + "grad_norm": 0.44606539607048035, + "learning_rate": 2.3385948251601183e-05, + "loss": 0.07069015502929688, + "step": 4957 + }, + { + "epoch": 0.3351358658915777, + "grad_norm": 0.7168605923652649, + "learning_rate": 2.338321232568626e-05, + "loss": 0.118682861328125, + "step": 4958 + }, + { + "epoch": 0.33520346086251185, + "grad_norm": 0.9799919128417969, + "learning_rate": 2.3380475994139394e-05, + "loss": 0.1482086181640625, + "step": 4959 + }, + { + "epoch": 0.335271055833446, + "grad_norm": 0.6845338344573975, + "learning_rate": 2.3377739257092983e-05, + "loss": 0.143707275390625, + "step": 4960 + }, + { + "epoch": 0.33533865080438013, + "grad_norm": 0.9557220935821533, + "learning_rate": 2.3375002114679446e-05, + "loss": 0.18946075439453125, + "step": 4961 + }, + { + "epoch": 0.3354062457753143, + "grad_norm": 0.626977801322937, + "learning_rate": 2.3372264567031222e-05, + "loss": 0.0924530029296875, + "step": 4962 + }, + { + "epoch": 0.3354738407462485, + "grad_norm": 0.6835946440696716, + "learning_rate": 2.3369526614280767e-05, + "loss": 0.12465667724609375, + "step": 4963 + }, + { + "epoch": 0.33554143571718265, + "grad_norm": 0.6447674632072449, + "learning_rate": 2.336678825656057e-05, + "loss": 0.09180450439453125, + "step": 4964 + }, + { + "epoch": 0.3356090306881168, + "grad_norm": 0.9254534244537354, + "learning_rate": 2.3364049494003123e-05, + "loss": 0.15356826782226562, + "step": 4965 + }, + { + "epoch": 0.335676625659051, + "grad_norm": 0.7729271054267883, + "learning_rate": 2.3361310326740944e-05, + "loss": 0.161773681640625, + "step": 4966 + }, + { + "epoch": 0.3357442206299851, + "grad_norm": 0.2562079131603241, + "learning_rate": 2.3358570754906572e-05, + "loss": 0.042266845703125, + "step": 4967 + }, + { + "epoch": 0.3358118156009193, + "grad_norm": 2.004323720932007, + "learning_rate": 2.335583077863257e-05, + "loss": 0.2940673828125, + "step": 4968 + }, + { + "epoch": 0.33587941057185344, + "grad_norm": 0.7494246959686279, + "learning_rate": 2.3353090398051507e-05, + "loss": 0.14289093017578125, + "step": 4969 + }, + { + "epoch": 0.3359470055427876, + "grad_norm": 0.621467113494873, + "learning_rate": 2.3350349613295987e-05, + "loss": 0.13816070556640625, + "step": 4970 + }, + { + "epoch": 0.3360146005137218, + "grad_norm": 0.8408346772193909, + "learning_rate": 2.334760842449862e-05, + "loss": 0.201324462890625, + "step": 4971 + }, + { + "epoch": 0.33608219548465595, + "grad_norm": 1.1072676181793213, + "learning_rate": 2.3344866831792047e-05, + "loss": 0.199920654296875, + "step": 4972 + }, + { + "epoch": 0.3361497904555901, + "grad_norm": 0.5314730405807495, + "learning_rate": 2.3342124835308917e-05, + "loss": 0.10003662109375, + "step": 4973 + }, + { + "epoch": 0.33621738542652424, + "grad_norm": 0.5652271509170532, + "learning_rate": 2.333938243518191e-05, + "loss": 0.097381591796875, + "step": 4974 + }, + { + "epoch": 0.3362849803974584, + "grad_norm": 0.9216791987419128, + "learning_rate": 2.333663963154372e-05, + "loss": 0.20001220703125, + "step": 4975 + }, + { + "epoch": 0.3363525753683926, + "grad_norm": 0.5153782963752747, + "learning_rate": 2.3333896424527058e-05, + "loss": 0.1035003662109375, + "step": 4976 + }, + { + "epoch": 0.33642017033932675, + "grad_norm": 0.3674708604812622, + "learning_rate": 2.3331152814264658e-05, + "loss": 0.07047271728515625, + "step": 4977 + }, + { + "epoch": 0.3364877653102609, + "grad_norm": 0.4645637273788452, + "learning_rate": 2.3328408800889273e-05, + "loss": 0.09731292724609375, + "step": 4978 + }, + { + "epoch": 0.3365553602811951, + "grad_norm": 1.1196480989456177, + "learning_rate": 2.3325664384533677e-05, + "loss": 0.199737548828125, + "step": 4979 + }, + { + "epoch": 0.33662295525212926, + "grad_norm": 0.8675222396850586, + "learning_rate": 2.3322919565330665e-05, + "loss": 0.2147064208984375, + "step": 4980 + }, + { + "epoch": 0.3366905502230634, + "grad_norm": 0.17616909742355347, + "learning_rate": 2.3320174343413042e-05, + "loss": 0.03968048095703125, + "step": 4981 + }, + { + "epoch": 0.33675814519399755, + "grad_norm": 0.825867235660553, + "learning_rate": 2.3317428718913642e-05, + "loss": 0.14333343505859375, + "step": 4982 + }, + { + "epoch": 0.3368257401649317, + "grad_norm": 0.6076140999794006, + "learning_rate": 2.3314682691965314e-05, + "loss": 0.11542510986328125, + "step": 4983 + }, + { + "epoch": 0.3368933351358659, + "grad_norm": 1.0105934143066406, + "learning_rate": 2.3311936262700934e-05, + "loss": 0.238677978515625, + "step": 4984 + }, + { + "epoch": 0.33696093010680006, + "grad_norm": 0.28725820779800415, + "learning_rate": 2.3309189431253378e-05, + "loss": 0.05141639709472656, + "step": 4985 + }, + { + "epoch": 0.33702852507773423, + "grad_norm": 0.3551715314388275, + "learning_rate": 2.3306442197755572e-05, + "loss": 0.0706329345703125, + "step": 4986 + }, + { + "epoch": 0.3370961200486684, + "grad_norm": 0.4608752727508545, + "learning_rate": 2.330369456234043e-05, + "loss": 0.0958709716796875, + "step": 4987 + }, + { + "epoch": 0.3371637150196025, + "grad_norm": 1.1730996370315552, + "learning_rate": 2.3300946525140903e-05, + "loss": 0.16890716552734375, + "step": 4988 + }, + { + "epoch": 0.3372313099905367, + "grad_norm": 1.3787626028060913, + "learning_rate": 2.3298198086289965e-05, + "loss": 0.225494384765625, + "step": 4989 + }, + { + "epoch": 0.33729890496147086, + "grad_norm": 0.40034422278404236, + "learning_rate": 2.329544924592059e-05, + "loss": 0.048122406005859375, + "step": 4990 + }, + { + "epoch": 0.33736649993240503, + "grad_norm": 1.2060729265213013, + "learning_rate": 2.3292700004165798e-05, + "loss": 0.16787147521972656, + "step": 4991 + }, + { + "epoch": 0.3374340949033392, + "grad_norm": 1.2145390510559082, + "learning_rate": 2.3289950361158605e-05, + "loss": 0.1513671875, + "step": 4992 + }, + { + "epoch": 0.33750168987427337, + "grad_norm": 0.7728398442268372, + "learning_rate": 2.328720031703206e-05, + "loss": 0.20172119140625, + "step": 4993 + }, + { + "epoch": 0.33756928484520754, + "grad_norm": 0.7950100302696228, + "learning_rate": 2.3284449871919227e-05, + "loss": 0.207061767578125, + "step": 4994 + }, + { + "epoch": 0.33763687981614166, + "grad_norm": 0.81927889585495, + "learning_rate": 2.328169902595319e-05, + "loss": 0.208343505859375, + "step": 4995 + }, + { + "epoch": 0.3377044747870758, + "grad_norm": 0.24669238924980164, + "learning_rate": 2.327894777926705e-05, + "loss": 0.029449462890625, + "step": 4996 + }, + { + "epoch": 0.33777206975801, + "grad_norm": 0.6071335077285767, + "learning_rate": 2.3276196131993932e-05, + "loss": 0.12703704833984375, + "step": 4997 + }, + { + "epoch": 0.33783966472894417, + "grad_norm": 0.5531734228134155, + "learning_rate": 2.327344408426697e-05, + "loss": 0.08718681335449219, + "step": 4998 + }, + { + "epoch": 0.33790725969987834, + "grad_norm": 1.276686429977417, + "learning_rate": 2.3270691636219338e-05, + "loss": 0.154510498046875, + "step": 4999 + }, + { + "epoch": 0.3379748546708125, + "grad_norm": 0.48846644163131714, + "learning_rate": 2.3267938787984206e-05, + "loss": 0.09781646728515625, + "step": 5000 + }, + { + "epoch": 0.3380424496417467, + "grad_norm": 0.6455529928207397, + "learning_rate": 2.326518553969478e-05, + "loss": 0.1360931396484375, + "step": 5001 + }, + { + "epoch": 0.3381100446126808, + "grad_norm": 0.8296560645103455, + "learning_rate": 2.326243189148427e-05, + "loss": 0.1463470458984375, + "step": 5002 + }, + { + "epoch": 0.33817763958361496, + "grad_norm": 0.7390207052230835, + "learning_rate": 2.3259677843485933e-05, + "loss": 0.12595367431640625, + "step": 5003 + }, + { + "epoch": 0.33824523455454913, + "grad_norm": 0.5233412981033325, + "learning_rate": 2.3256923395833008e-05, + "loss": 0.1207275390625, + "step": 5004 + }, + { + "epoch": 0.3383128295254833, + "grad_norm": 0.6018333435058594, + "learning_rate": 2.325416854865878e-05, + "loss": 0.1220703125, + "step": 5005 + }, + { + "epoch": 0.3383804244964175, + "grad_norm": 0.7416953444480896, + "learning_rate": 2.3251413302096545e-05, + "loss": 0.1519012451171875, + "step": 5006 + }, + { + "epoch": 0.33844801946735165, + "grad_norm": 0.7641796469688416, + "learning_rate": 2.3248657656279622e-05, + "loss": 0.1449432373046875, + "step": 5007 + }, + { + "epoch": 0.3385156144382858, + "grad_norm": 0.6745203733444214, + "learning_rate": 2.3245901611341344e-05, + "loss": 0.1585235595703125, + "step": 5008 + }, + { + "epoch": 0.33858320940921993, + "grad_norm": 0.9320836663246155, + "learning_rate": 2.3243145167415063e-05, + "loss": 0.1535491943359375, + "step": 5009 + }, + { + "epoch": 0.3386508043801541, + "grad_norm": 0.5428636074066162, + "learning_rate": 2.3240388324634155e-05, + "loss": 0.1254425048828125, + "step": 5010 + }, + { + "epoch": 0.3387183993510883, + "grad_norm": 0.5272694826126099, + "learning_rate": 2.323763108313202e-05, + "loss": 0.1257781982421875, + "step": 5011 + }, + { + "epoch": 0.33878599432202244, + "grad_norm": 0.3043110966682434, + "learning_rate": 2.3234873443042056e-05, + "loss": 0.05170440673828125, + "step": 5012 + }, + { + "epoch": 0.3388535892929566, + "grad_norm": 0.35014402866363525, + "learning_rate": 2.3232115404497706e-05, + "loss": 0.08295822143554688, + "step": 5013 + }, + { + "epoch": 0.3389211842638908, + "grad_norm": 0.8471705913543701, + "learning_rate": 2.3229356967632412e-05, + "loss": 0.1943206787109375, + "step": 5014 + }, + { + "epoch": 0.33898877923482496, + "grad_norm": 0.3261091709136963, + "learning_rate": 2.322659813257966e-05, + "loss": 0.0770111083984375, + "step": 5015 + }, + { + "epoch": 0.33905637420575907, + "grad_norm": 0.9462964534759521, + "learning_rate": 2.3223838899472923e-05, + "loss": 0.146697998046875, + "step": 5016 + }, + { + "epoch": 0.33912396917669324, + "grad_norm": 0.3244129419326782, + "learning_rate": 2.322107926844572e-05, + "loss": 0.04767799377441406, + "step": 5017 + }, + { + "epoch": 0.3391915641476274, + "grad_norm": 1.2250696420669556, + "learning_rate": 2.321831923963157e-05, + "loss": 0.17949676513671875, + "step": 5018 + }, + { + "epoch": 0.3392591591185616, + "grad_norm": 0.6337512731552124, + "learning_rate": 2.3215558813164037e-05, + "loss": 0.14166259765625, + "step": 5019 + }, + { + "epoch": 0.33932675408949575, + "grad_norm": 0.20830278098583221, + "learning_rate": 2.3212797989176667e-05, + "loss": 0.0369110107421875, + "step": 5020 + }, + { + "epoch": 0.3393943490604299, + "grad_norm": 0.41818514466285706, + "learning_rate": 2.321003676780306e-05, + "loss": 0.05747222900390625, + "step": 5021 + }, + { + "epoch": 0.3394619440313641, + "grad_norm": 0.6978012323379517, + "learning_rate": 2.3207275149176812e-05, + "loss": 0.10960769653320312, + "step": 5022 + }, + { + "epoch": 0.3395295390022982, + "grad_norm": 0.7852663397789001, + "learning_rate": 2.3204513133431555e-05, + "loss": 0.1455535888671875, + "step": 5023 + }, + { + "epoch": 0.3395971339732324, + "grad_norm": 0.20493634045124054, + "learning_rate": 2.3201750720700926e-05, + "loss": 0.03769683837890625, + "step": 5024 + }, + { + "epoch": 0.33966472894416655, + "grad_norm": 0.42198991775512695, + "learning_rate": 2.3198987911118593e-05, + "loss": 0.1071014404296875, + "step": 5025 + }, + { + "epoch": 0.3397323239151007, + "grad_norm": 0.7337316870689392, + "learning_rate": 2.3196224704818242e-05, + "loss": 0.1423797607421875, + "step": 5026 + }, + { + "epoch": 0.3397999188860349, + "grad_norm": 1.0731972455978394, + "learning_rate": 2.319346110193356e-05, + "loss": 0.1772918701171875, + "step": 5027 + }, + { + "epoch": 0.33986751385696906, + "grad_norm": 0.5328977108001709, + "learning_rate": 2.319069710259828e-05, + "loss": 0.071441650390625, + "step": 5028 + }, + { + "epoch": 0.3399351088279032, + "grad_norm": 0.5386819839477539, + "learning_rate": 2.3187932706946135e-05, + "loss": 0.0867767333984375, + "step": 5029 + }, + { + "epoch": 0.34000270379883735, + "grad_norm": 0.46277299523353577, + "learning_rate": 2.3185167915110882e-05, + "loss": 0.05872344970703125, + "step": 5030 + }, + { + "epoch": 0.3400702987697715, + "grad_norm": 0.9294494390487671, + "learning_rate": 2.3182402727226306e-05, + "loss": 0.1623687744140625, + "step": 5031 + }, + { + "epoch": 0.3401378937407057, + "grad_norm": 0.36109641194343567, + "learning_rate": 2.3179637143426204e-05, + "loss": 0.0584716796875, + "step": 5032 + }, + { + "epoch": 0.34020548871163986, + "grad_norm": 0.8785193562507629, + "learning_rate": 2.317687116384438e-05, + "loss": 0.168060302734375, + "step": 5033 + }, + { + "epoch": 0.34027308368257403, + "grad_norm": 0.7476078271865845, + "learning_rate": 2.3174104788614684e-05, + "loss": 0.169097900390625, + "step": 5034 + }, + { + "epoch": 0.3403406786535082, + "grad_norm": 0.52705979347229, + "learning_rate": 2.317133801787096e-05, + "loss": 0.0719146728515625, + "step": 5035 + }, + { + "epoch": 0.3404082736244423, + "grad_norm": 0.7737045884132385, + "learning_rate": 2.316857085174709e-05, + "loss": 0.150787353515625, + "step": 5036 + }, + { + "epoch": 0.3404758685953765, + "grad_norm": 2.262300491333008, + "learning_rate": 2.316580329037696e-05, + "loss": 0.1641082763671875, + "step": 5037 + }, + { + "epoch": 0.34054346356631066, + "grad_norm": 1.6774170398712158, + "learning_rate": 2.3163035333894483e-05, + "loss": 0.1819000244140625, + "step": 5038 + }, + { + "epoch": 0.3406110585372448, + "grad_norm": 0.34184229373931885, + "learning_rate": 2.3160266982433593e-05, + "loss": 0.0623779296875, + "step": 5039 + }, + { + "epoch": 0.340678653508179, + "grad_norm": 1.1144746541976929, + "learning_rate": 2.3157498236128236e-05, + "loss": 0.1944732666015625, + "step": 5040 + }, + { + "epoch": 0.34074624847911317, + "grad_norm": 1.5523576736450195, + "learning_rate": 2.3154729095112384e-05, + "loss": 0.213043212890625, + "step": 5041 + }, + { + "epoch": 0.34081384345004734, + "grad_norm": 0.4508211612701416, + "learning_rate": 2.315195955952003e-05, + "loss": 0.10353851318359375, + "step": 5042 + }, + { + "epoch": 0.34088143842098145, + "grad_norm": 0.27235788106918335, + "learning_rate": 2.314918962948517e-05, + "loss": 0.06195831298828125, + "step": 5043 + }, + { + "epoch": 0.3409490333919156, + "grad_norm": 0.33176034688949585, + "learning_rate": 2.3146419305141838e-05, + "loss": 0.079559326171875, + "step": 5044 + }, + { + "epoch": 0.3410166283628498, + "grad_norm": 1.2579765319824219, + "learning_rate": 2.3143648586624077e-05, + "loss": 0.236419677734375, + "step": 5045 + }, + { + "epoch": 0.34108422333378396, + "grad_norm": 0.9625036716461182, + "learning_rate": 2.3140877474065954e-05, + "loss": 0.1563568115234375, + "step": 5046 + }, + { + "epoch": 0.34115181830471814, + "grad_norm": 0.4026227593421936, + "learning_rate": 2.313810596760155e-05, + "loss": 0.09275054931640625, + "step": 5047 + }, + { + "epoch": 0.3412194132756523, + "grad_norm": 0.25687775015830994, + "learning_rate": 2.3135334067364972e-05, + "loss": 0.04541015625, + "step": 5048 + }, + { + "epoch": 0.3412870082465865, + "grad_norm": 1.1304931640625, + "learning_rate": 2.3132561773490338e-05, + "loss": 0.1602783203125, + "step": 5049 + }, + { + "epoch": 0.3413546032175206, + "grad_norm": 0.22683890163898468, + "learning_rate": 2.3129789086111785e-05, + "loss": 0.0511322021484375, + "step": 5050 + }, + { + "epoch": 0.34142219818845476, + "grad_norm": 0.6259018778800964, + "learning_rate": 2.3127016005363478e-05, + "loss": 0.151336669921875, + "step": 5051 + }, + { + "epoch": 0.34148979315938893, + "grad_norm": 0.41437286138534546, + "learning_rate": 2.31242425313796e-05, + "loss": 0.0943756103515625, + "step": 5052 + }, + { + "epoch": 0.3415573881303231, + "grad_norm": 0.79329514503479, + "learning_rate": 2.3121468664294345e-05, + "loss": 0.131805419921875, + "step": 5053 + }, + { + "epoch": 0.3416249831012573, + "grad_norm": 0.6595682501792908, + "learning_rate": 2.3118694404241927e-05, + "loss": 0.139923095703125, + "step": 5054 + }, + { + "epoch": 0.34169257807219144, + "grad_norm": 0.516678512096405, + "learning_rate": 2.3115919751356583e-05, + "loss": 0.10137939453125, + "step": 5055 + }, + { + "epoch": 0.3417601730431256, + "grad_norm": 0.20442546904087067, + "learning_rate": 2.311314470577257e-05, + "loss": 0.03237152099609375, + "step": 5056 + }, + { + "epoch": 0.34182776801405973, + "grad_norm": 0.5801090002059937, + "learning_rate": 2.311036926762416e-05, + "loss": 0.1243438720703125, + "step": 5057 + }, + { + "epoch": 0.3418953629849939, + "grad_norm": 0.5161773562431335, + "learning_rate": 2.3107593437045643e-05, + "loss": 0.1056365966796875, + "step": 5058 + }, + { + "epoch": 0.34196295795592807, + "grad_norm": 0.913569450378418, + "learning_rate": 2.310481721417134e-05, + "loss": 0.18292999267578125, + "step": 5059 + }, + { + "epoch": 0.34203055292686224, + "grad_norm": 0.5976070165634155, + "learning_rate": 2.3102040599135578e-05, + "loss": 0.095184326171875, + "step": 5060 + }, + { + "epoch": 0.3420981478977964, + "grad_norm": 0.9383872747421265, + "learning_rate": 2.3099263592072703e-05, + "loss": 0.164825439453125, + "step": 5061 + }, + { + "epoch": 0.3421657428687306, + "grad_norm": 0.4364163875579834, + "learning_rate": 2.3096486193117088e-05, + "loss": 0.08113861083984375, + "step": 5062 + }, + { + "epoch": 0.34223333783966475, + "grad_norm": 0.8136324882507324, + "learning_rate": 2.309370840240312e-05, + "loss": 0.12853240966796875, + "step": 5063 + }, + { + "epoch": 0.34230093281059887, + "grad_norm": 1.9347597360610962, + "learning_rate": 2.3090930220065202e-05, + "loss": 0.18809127807617188, + "step": 5064 + }, + { + "epoch": 0.34236852778153304, + "grad_norm": 0.983054518699646, + "learning_rate": 2.3088151646237766e-05, + "loss": 0.15554428100585938, + "step": 5065 + }, + { + "epoch": 0.3424361227524672, + "grad_norm": 0.4733498990535736, + "learning_rate": 2.3085372681055257e-05, + "loss": 0.07958984375, + "step": 5066 + }, + { + "epoch": 0.3425037177234014, + "grad_norm": 0.19683519005775452, + "learning_rate": 2.3082593324652132e-05, + "loss": 0.03601837158203125, + "step": 5067 + }, + { + "epoch": 0.34257131269433555, + "grad_norm": 2.264115810394287, + "learning_rate": 2.307981357716287e-05, + "loss": 0.2735595703125, + "step": 5068 + }, + { + "epoch": 0.3426389076652697, + "grad_norm": 0.16871751844882965, + "learning_rate": 2.307703343872199e-05, + "loss": 0.031024932861328125, + "step": 5069 + }, + { + "epoch": 0.3427065026362039, + "grad_norm": 0.39344218373298645, + "learning_rate": 2.3074252909463994e-05, + "loss": 0.0949249267578125, + "step": 5070 + }, + { + "epoch": 0.342774097607138, + "grad_norm": 1.808022141456604, + "learning_rate": 2.3071471989523437e-05, + "loss": 0.16621780395507812, + "step": 5071 + }, + { + "epoch": 0.3428416925780722, + "grad_norm": 1.264532446861267, + "learning_rate": 2.3068690679034863e-05, + "loss": 0.1465930938720703, + "step": 5072 + }, + { + "epoch": 0.34290928754900635, + "grad_norm": 0.43471601605415344, + "learning_rate": 2.3065908978132855e-05, + "loss": 0.0737762451171875, + "step": 5073 + }, + { + "epoch": 0.3429768825199405, + "grad_norm": 0.44124236702919006, + "learning_rate": 2.306312688695201e-05, + "loss": 0.1049041748046875, + "step": 5074 + }, + { + "epoch": 0.3430444774908747, + "grad_norm": 0.28004875779151917, + "learning_rate": 2.3060344405626942e-05, + "loss": 0.06746673583984375, + "step": 5075 + }, + { + "epoch": 0.34311207246180886, + "grad_norm": 0.9154136180877686, + "learning_rate": 2.3057561534292287e-05, + "loss": 0.11517333984375, + "step": 5076 + }, + { + "epoch": 0.34317966743274303, + "grad_norm": 0.26547718048095703, + "learning_rate": 2.3054778273082695e-05, + "loss": 0.0458831787109375, + "step": 5077 + }, + { + "epoch": 0.34324726240367714, + "grad_norm": 0.2523127794265747, + "learning_rate": 2.3051994622132838e-05, + "loss": 0.04665374755859375, + "step": 5078 + }, + { + "epoch": 0.3433148573746113, + "grad_norm": 0.2548302710056305, + "learning_rate": 2.304921058157741e-05, + "loss": 0.065277099609375, + "step": 5079 + }, + { + "epoch": 0.3433824523455455, + "grad_norm": 0.3432652950286865, + "learning_rate": 2.304642615155111e-05, + "loss": 0.09145355224609375, + "step": 5080 + }, + { + "epoch": 0.34345004731647966, + "grad_norm": 0.4430122673511505, + "learning_rate": 2.3043641332188673e-05, + "loss": 0.10205078125, + "step": 5081 + }, + { + "epoch": 0.3435176422874138, + "grad_norm": 0.4850219786167145, + "learning_rate": 2.3040856123624846e-05, + "loss": 0.1344757080078125, + "step": 5082 + }, + { + "epoch": 0.343585237258348, + "grad_norm": 0.27478837966918945, + "learning_rate": 2.30380705259944e-05, + "loss": 0.06398773193359375, + "step": 5083 + }, + { + "epoch": 0.34365283222928217, + "grad_norm": 1.1389594078063965, + "learning_rate": 2.3035284539432108e-05, + "loss": 0.237640380859375, + "step": 5084 + }, + { + "epoch": 0.3437204272002163, + "grad_norm": 0.3875144124031067, + "learning_rate": 2.3032498164072777e-05, + "loss": 0.072113037109375, + "step": 5085 + }, + { + "epoch": 0.34378802217115045, + "grad_norm": 0.7151209115982056, + "learning_rate": 2.3029711400051236e-05, + "loss": 0.166229248046875, + "step": 5086 + }, + { + "epoch": 0.3438556171420846, + "grad_norm": 0.5361102223396301, + "learning_rate": 2.3026924247502323e-05, + "loss": 0.09641265869140625, + "step": 5087 + }, + { + "epoch": 0.3439232121130188, + "grad_norm": 1.1720054149627686, + "learning_rate": 2.3024136706560892e-05, + "loss": 0.1431427001953125, + "step": 5088 + }, + { + "epoch": 0.34399080708395297, + "grad_norm": 0.27447670698165894, + "learning_rate": 2.3021348777361828e-05, + "loss": 0.0696258544921875, + "step": 5089 + }, + { + "epoch": 0.34405840205488714, + "grad_norm": 0.3825188875198364, + "learning_rate": 2.301856046004002e-05, + "loss": 0.03757476806640625, + "step": 5090 + }, + { + "epoch": 0.3441259970258213, + "grad_norm": 0.5756194591522217, + "learning_rate": 2.30157717547304e-05, + "loss": 0.152374267578125, + "step": 5091 + }, + { + "epoch": 0.3441935919967554, + "grad_norm": 0.444818913936615, + "learning_rate": 2.3012982661567886e-05, + "loss": 0.0792083740234375, + "step": 5092 + }, + { + "epoch": 0.3442611869676896, + "grad_norm": 0.9700287580490112, + "learning_rate": 2.3010193180687438e-05, + "loss": 0.158660888671875, + "step": 5093 + }, + { + "epoch": 0.34432878193862376, + "grad_norm": 0.4169450104236603, + "learning_rate": 2.3007403312224033e-05, + "loss": 0.04458808898925781, + "step": 5094 + }, + { + "epoch": 0.34439637690955793, + "grad_norm": 0.4037080705165863, + "learning_rate": 2.3004613056312653e-05, + "loss": 0.07425689697265625, + "step": 5095 + }, + { + "epoch": 0.3444639718804921, + "grad_norm": 0.2234620302915573, + "learning_rate": 2.300182241308832e-05, + "loss": 0.03664398193359375, + "step": 5096 + }, + { + "epoch": 0.3445315668514263, + "grad_norm": 0.9043830633163452, + "learning_rate": 2.2999031382686053e-05, + "loss": 0.219757080078125, + "step": 5097 + }, + { + "epoch": 0.3445991618223604, + "grad_norm": 0.264163613319397, + "learning_rate": 2.2996239965240896e-05, + "loss": 0.05779266357421875, + "step": 5098 + }, + { + "epoch": 0.34466675679329456, + "grad_norm": 0.7967213988304138, + "learning_rate": 2.299344816088793e-05, + "loss": 0.14858245849609375, + "step": 5099 + }, + { + "epoch": 0.34473435176422873, + "grad_norm": 0.9008874297142029, + "learning_rate": 2.2990655969762228e-05, + "loss": 0.1132659912109375, + "step": 5100 + }, + { + "epoch": 0.3448019467351629, + "grad_norm": 1.1301697492599487, + "learning_rate": 2.29878633919989e-05, + "loss": 0.2689208984375, + "step": 5101 + }, + { + "epoch": 0.34486954170609707, + "grad_norm": 0.4876960515975952, + "learning_rate": 2.2985070427733064e-05, + "loss": 0.0971832275390625, + "step": 5102 + }, + { + "epoch": 0.34493713667703124, + "grad_norm": 0.5258723497390747, + "learning_rate": 2.298227707709986e-05, + "loss": 0.1107025146484375, + "step": 5103 + }, + { + "epoch": 0.3450047316479654, + "grad_norm": 0.349315345287323, + "learning_rate": 2.2979483340234452e-05, + "loss": 0.0769195556640625, + "step": 5104 + }, + { + "epoch": 0.3450723266188995, + "grad_norm": 0.7810129523277283, + "learning_rate": 2.297668921727201e-05, + "loss": 0.123870849609375, + "step": 5105 + }, + { + "epoch": 0.3451399215898337, + "grad_norm": 0.24875269830226898, + "learning_rate": 2.2973894708347742e-05, + "loss": 0.038543701171875, + "step": 5106 + }, + { + "epoch": 0.34520751656076787, + "grad_norm": 0.9125117063522339, + "learning_rate": 2.2971099813596855e-05, + "loss": 0.1747589111328125, + "step": 5107 + }, + { + "epoch": 0.34527511153170204, + "grad_norm": 0.22637400031089783, + "learning_rate": 2.2968304533154595e-05, + "loss": 0.05886077880859375, + "step": 5108 + }, + { + "epoch": 0.3453427065026362, + "grad_norm": 0.45579880475997925, + "learning_rate": 2.29655088671562e-05, + "loss": 0.081207275390625, + "step": 5109 + }, + { + "epoch": 0.3454103014735704, + "grad_norm": 0.5120406150817871, + "learning_rate": 2.2962712815736953e-05, + "loss": 0.11212158203125, + "step": 5110 + }, + { + "epoch": 0.34547789644450455, + "grad_norm": 0.3282274901866913, + "learning_rate": 2.295991637903214e-05, + "loss": 0.068634033203125, + "step": 5111 + }, + { + "epoch": 0.34554549141543867, + "grad_norm": 0.6958644986152649, + "learning_rate": 2.2957119557177073e-05, + "loss": 0.12320709228515625, + "step": 5112 + }, + { + "epoch": 0.34561308638637284, + "grad_norm": 0.6884657144546509, + "learning_rate": 2.2954322350307075e-05, + "loss": 0.1190643310546875, + "step": 5113 + }, + { + "epoch": 0.345680681357307, + "grad_norm": 0.22750124335289001, + "learning_rate": 2.295152475855749e-05, + "loss": 0.0390167236328125, + "step": 5114 + }, + { + "epoch": 0.3457482763282412, + "grad_norm": 0.559291422367096, + "learning_rate": 2.2948726782063688e-05, + "loss": 0.141754150390625, + "step": 5115 + }, + { + "epoch": 0.34581587129917535, + "grad_norm": 0.9548380374908447, + "learning_rate": 2.2945928420961056e-05, + "loss": 0.1771240234375, + "step": 5116 + }, + { + "epoch": 0.3458834662701095, + "grad_norm": 0.5586147904396057, + "learning_rate": 2.294312967538499e-05, + "loss": 0.08246612548828125, + "step": 5117 + }, + { + "epoch": 0.3459510612410437, + "grad_norm": 0.8750038743019104, + "learning_rate": 2.2940330545470912e-05, + "loss": 0.1754150390625, + "step": 5118 + }, + { + "epoch": 0.3460186562119778, + "grad_norm": 0.20490966737270355, + "learning_rate": 2.2937531031354263e-05, + "loss": 0.052455902099609375, + "step": 5119 + }, + { + "epoch": 0.346086251182912, + "grad_norm": 0.5633097887039185, + "learning_rate": 2.29347311331705e-05, + "loss": 0.117218017578125, + "step": 5120 + }, + { + "epoch": 0.34615384615384615, + "grad_norm": 1.1394895315170288, + "learning_rate": 2.2931930851055095e-05, + "loss": 0.2080078125, + "step": 5121 + }, + { + "epoch": 0.3462214411247803, + "grad_norm": 0.8100658059120178, + "learning_rate": 2.2929130185143553e-05, + "loss": 0.1205291748046875, + "step": 5122 + }, + { + "epoch": 0.3462890360957145, + "grad_norm": 0.823171854019165, + "learning_rate": 2.2926329135571378e-05, + "loss": 0.187347412109375, + "step": 5123 + }, + { + "epoch": 0.34635663106664866, + "grad_norm": 0.31584519147872925, + "learning_rate": 2.292352770247411e-05, + "loss": 0.056243896484375, + "step": 5124 + }, + { + "epoch": 0.3464242260375828, + "grad_norm": 0.5891517400741577, + "learning_rate": 2.2920725885987292e-05, + "loss": 0.09421730041503906, + "step": 5125 + }, + { + "epoch": 0.34649182100851694, + "grad_norm": 0.5872898101806641, + "learning_rate": 2.29179236862465e-05, + "loss": 0.168792724609375, + "step": 5126 + }, + { + "epoch": 0.3465594159794511, + "grad_norm": 0.7157860398292542, + "learning_rate": 2.2915121103387314e-05, + "loss": 0.11879539489746094, + "step": 5127 + }, + { + "epoch": 0.3466270109503853, + "grad_norm": 0.9829388856887817, + "learning_rate": 2.291231813754535e-05, + "loss": 0.1231832504272461, + "step": 5128 + }, + { + "epoch": 0.34669460592131945, + "grad_norm": 0.7727842330932617, + "learning_rate": 2.2909514788856224e-05, + "loss": 0.15655517578125, + "step": 5129 + }, + { + "epoch": 0.3467622008922536, + "grad_norm": 1.2858585119247437, + "learning_rate": 2.2906711057455592e-05, + "loss": 0.24786376953125, + "step": 5130 + }, + { + "epoch": 0.3468297958631878, + "grad_norm": 0.8777881264686584, + "learning_rate": 2.29039069434791e-05, + "loss": 0.16546630859375, + "step": 5131 + }, + { + "epoch": 0.34689739083412197, + "grad_norm": 1.3703763484954834, + "learning_rate": 2.2901102447062437e-05, + "loss": 0.18597412109375, + "step": 5132 + }, + { + "epoch": 0.3469649858050561, + "grad_norm": 0.2640206813812256, + "learning_rate": 2.2898297568341305e-05, + "loss": 0.041927337646484375, + "step": 5133 + }, + { + "epoch": 0.34703258077599025, + "grad_norm": 0.8603159785270691, + "learning_rate": 2.2895492307451414e-05, + "loss": 0.10805511474609375, + "step": 5134 + }, + { + "epoch": 0.3471001757469244, + "grad_norm": 1.7415472269058228, + "learning_rate": 2.2892686664528508e-05, + "loss": 0.2186279296875, + "step": 5135 + }, + { + "epoch": 0.3471677707178586, + "grad_norm": 0.524154543876648, + "learning_rate": 2.2889880639708333e-05, + "loss": 0.1337127685546875, + "step": 5136 + }, + { + "epoch": 0.34723536568879276, + "grad_norm": 0.7038068771362305, + "learning_rate": 2.288707423312667e-05, + "loss": 0.11778640747070312, + "step": 5137 + }, + { + "epoch": 0.34730296065972693, + "grad_norm": 1.504224181175232, + "learning_rate": 2.2884267444919302e-05, + "loss": 0.16301727294921875, + "step": 5138 + }, + { + "epoch": 0.3473705556306611, + "grad_norm": 1.0710042715072632, + "learning_rate": 2.2881460275222043e-05, + "loss": 0.1578369140625, + "step": 5139 + }, + { + "epoch": 0.3474381506015952, + "grad_norm": 0.5677447319030762, + "learning_rate": 2.2878652724170727e-05, + "loss": 0.115142822265625, + "step": 5140 + }, + { + "epoch": 0.3475057455725294, + "grad_norm": 0.37480565905570984, + "learning_rate": 2.287584479190119e-05, + "loss": 0.12508392333984375, + "step": 5141 + }, + { + "epoch": 0.34757334054346356, + "grad_norm": 0.16147273778915405, + "learning_rate": 2.2873036478549307e-05, + "loss": 0.028387069702148438, + "step": 5142 + }, + { + "epoch": 0.34764093551439773, + "grad_norm": 0.27313604950904846, + "learning_rate": 2.2870227784250954e-05, + "loss": 0.033405303955078125, + "step": 5143 + }, + { + "epoch": 0.3477085304853319, + "grad_norm": 0.7198265194892883, + "learning_rate": 2.2867418709142035e-05, + "loss": 0.1333160400390625, + "step": 5144 + }, + { + "epoch": 0.34777612545626607, + "grad_norm": 0.7667776942253113, + "learning_rate": 2.2864609253358477e-05, + "loss": 0.0795745849609375, + "step": 5145 + }, + { + "epoch": 0.34784372042720024, + "grad_norm": 0.3651522099971771, + "learning_rate": 2.286179941703621e-05, + "loss": 0.07806015014648438, + "step": 5146 + }, + { + "epoch": 0.34791131539813436, + "grad_norm": 0.4671342372894287, + "learning_rate": 2.28589892003112e-05, + "loss": 0.08538818359375, + "step": 5147 + }, + { + "epoch": 0.34797891036906853, + "grad_norm": 0.7459509968757629, + "learning_rate": 2.2856178603319413e-05, + "loss": 0.12557220458984375, + "step": 5148 + }, + { + "epoch": 0.3480465053400027, + "grad_norm": 0.3149915635585785, + "learning_rate": 2.2853367626196845e-05, + "loss": 0.0638885498046875, + "step": 5149 + }, + { + "epoch": 0.34811410031093687, + "grad_norm": 0.13037046790122986, + "learning_rate": 2.2850556269079522e-05, + "loss": 0.025165557861328125, + "step": 5150 + }, + { + "epoch": 0.34818169528187104, + "grad_norm": 0.3371737003326416, + "learning_rate": 2.284774453210346e-05, + "loss": 0.05567741394042969, + "step": 5151 + }, + { + "epoch": 0.3482492902528052, + "grad_norm": 0.22156211733818054, + "learning_rate": 2.2844932415404706e-05, + "loss": 0.0325775146484375, + "step": 5152 + }, + { + "epoch": 0.3483168852237394, + "grad_norm": 0.37321504950523376, + "learning_rate": 2.2842119919119342e-05, + "loss": 0.0918121337890625, + "step": 5153 + }, + { + "epoch": 0.3483844801946735, + "grad_norm": 0.3647233545780182, + "learning_rate": 2.2839307043383444e-05, + "loss": 0.04502677917480469, + "step": 5154 + }, + { + "epoch": 0.34845207516560767, + "grad_norm": 1.1474578380584717, + "learning_rate": 2.2836493788333118e-05, + "loss": 0.2783203125, + "step": 5155 + }, + { + "epoch": 0.34851967013654184, + "grad_norm": 1.691568374633789, + "learning_rate": 2.283368015410449e-05, + "loss": 0.208343505859375, + "step": 5156 + }, + { + "epoch": 0.348587265107476, + "grad_norm": 0.28726354241371155, + "learning_rate": 2.28308661408337e-05, + "loss": 0.04822540283203125, + "step": 5157 + }, + { + "epoch": 0.3486548600784102, + "grad_norm": 0.6642056703567505, + "learning_rate": 2.2828051748656903e-05, + "loss": 0.14292144775390625, + "step": 5158 + }, + { + "epoch": 0.34872245504934435, + "grad_norm": 0.35834190249443054, + "learning_rate": 2.2825236977710288e-05, + "loss": 0.07979583740234375, + "step": 5159 + }, + { + "epoch": 0.34879005002027846, + "grad_norm": 0.3187779188156128, + "learning_rate": 2.282242182813004e-05, + "loss": 0.05092620849609375, + "step": 5160 + }, + { + "epoch": 0.34885764499121263, + "grad_norm": 0.6285379528999329, + "learning_rate": 2.2819606300052372e-05, + "loss": 0.1536712646484375, + "step": 5161 + }, + { + "epoch": 0.3489252399621468, + "grad_norm": 0.8207674622535706, + "learning_rate": 2.2816790393613525e-05, + "loss": 0.214447021484375, + "step": 5162 + }, + { + "epoch": 0.348992834933081, + "grad_norm": 0.2946471571922302, + "learning_rate": 2.281397410894975e-05, + "loss": 0.05596160888671875, + "step": 5163 + }, + { + "epoch": 0.34906042990401515, + "grad_norm": 0.8560787439346313, + "learning_rate": 2.2811157446197307e-05, + "loss": 0.1219482421875, + "step": 5164 + }, + { + "epoch": 0.3491280248749493, + "grad_norm": 0.1762470006942749, + "learning_rate": 2.2808340405492486e-05, + "loss": 0.033050537109375, + "step": 5165 + }, + { + "epoch": 0.3491956198458835, + "grad_norm": 0.22496536374092102, + "learning_rate": 2.2805522986971607e-05, + "loss": 0.040599822998046875, + "step": 5166 + }, + { + "epoch": 0.3492632148168176, + "grad_norm": 1.0120372772216797, + "learning_rate": 2.2802705190770978e-05, + "loss": 0.220733642578125, + "step": 5167 + }, + { + "epoch": 0.3493308097877518, + "grad_norm": 0.8631182909011841, + "learning_rate": 2.279988701702695e-05, + "loss": 0.223419189453125, + "step": 5168 + }, + { + "epoch": 0.34939840475868594, + "grad_norm": 0.5568606853485107, + "learning_rate": 2.2797068465875878e-05, + "loss": 0.107818603515625, + "step": 5169 + }, + { + "epoch": 0.3494659997296201, + "grad_norm": 0.6499901413917542, + "learning_rate": 2.279424953745415e-05, + "loss": 0.12702178955078125, + "step": 5170 + }, + { + "epoch": 0.3495335947005543, + "grad_norm": 0.6114886403083801, + "learning_rate": 2.279143023189815e-05, + "loss": 0.1288909912109375, + "step": 5171 + }, + { + "epoch": 0.34960118967148845, + "grad_norm": 0.41702529788017273, + "learning_rate": 2.2788610549344307e-05, + "loss": 0.07379150390625, + "step": 5172 + }, + { + "epoch": 0.3496687846424226, + "grad_norm": 0.5841506719589233, + "learning_rate": 2.2785790489929045e-05, + "loss": 0.1048736572265625, + "step": 5173 + }, + { + "epoch": 0.34973637961335674, + "grad_norm": 0.3641476631164551, + "learning_rate": 2.278297005378882e-05, + "loss": 0.0702667236328125, + "step": 5174 + }, + { + "epoch": 0.3498039745842909, + "grad_norm": 1.004530429840088, + "learning_rate": 2.2780149241060108e-05, + "loss": 0.2053985595703125, + "step": 5175 + }, + { + "epoch": 0.3498715695552251, + "grad_norm": 0.3095797002315521, + "learning_rate": 2.2777328051879385e-05, + "loss": 0.06215667724609375, + "step": 5176 + }, + { + "epoch": 0.34993916452615925, + "grad_norm": 1.526321530342102, + "learning_rate": 2.2774506486383164e-05, + "loss": 0.158660888671875, + "step": 5177 + }, + { + "epoch": 0.3500067594970934, + "grad_norm": 0.2876974046230316, + "learning_rate": 2.277168454470797e-05, + "loss": 0.04926300048828125, + "step": 5178 + }, + { + "epoch": 0.3500743544680276, + "grad_norm": 0.7451337575912476, + "learning_rate": 2.276886222699035e-05, + "loss": 0.1360931396484375, + "step": 5179 + }, + { + "epoch": 0.35014194943896176, + "grad_norm": 1.11702561378479, + "learning_rate": 2.2766039533366857e-05, + "loss": 0.22869873046875, + "step": 5180 + }, + { + "epoch": 0.3502095444098959, + "grad_norm": 0.482400119304657, + "learning_rate": 2.2763216463974077e-05, + "loss": 0.0961456298828125, + "step": 5181 + }, + { + "epoch": 0.35027713938083005, + "grad_norm": 0.8465874791145325, + "learning_rate": 2.2760393018948605e-05, + "loss": 0.136932373046875, + "step": 5182 + }, + { + "epoch": 0.3503447343517642, + "grad_norm": 0.647552490234375, + "learning_rate": 2.2757569198427065e-05, + "loss": 0.10272979736328125, + "step": 5183 + }, + { + "epoch": 0.3504123293226984, + "grad_norm": 0.3879786431789398, + "learning_rate": 2.2754745002546073e-05, + "loss": 0.10144996643066406, + "step": 5184 + }, + { + "epoch": 0.35047992429363256, + "grad_norm": 0.5607888102531433, + "learning_rate": 2.2751920431442293e-05, + "loss": 0.113128662109375, + "step": 5185 + }, + { + "epoch": 0.35054751926456673, + "grad_norm": 0.31835034489631653, + "learning_rate": 2.2749095485252397e-05, + "loss": 0.05108642578125, + "step": 5186 + }, + { + "epoch": 0.3506151142355009, + "grad_norm": 0.7349342703819275, + "learning_rate": 2.274627016411307e-05, + "loss": 0.1453704833984375, + "step": 5187 + }, + { + "epoch": 0.350682709206435, + "grad_norm": 0.7064315676689148, + "learning_rate": 2.2743444468161016e-05, + "loss": 0.08731460571289062, + "step": 5188 + }, + { + "epoch": 0.3507503041773692, + "grad_norm": 0.8004326820373535, + "learning_rate": 2.274061839753297e-05, + "loss": 0.1512298583984375, + "step": 5189 + }, + { + "epoch": 0.35081789914830336, + "grad_norm": 0.8657540678977966, + "learning_rate": 2.273779195236566e-05, + "loss": 0.15130615234375, + "step": 5190 + }, + { + "epoch": 0.35088549411923753, + "grad_norm": 0.46286511421203613, + "learning_rate": 2.2734965132795856e-05, + "loss": 0.07637786865234375, + "step": 5191 + }, + { + "epoch": 0.3509530890901717, + "grad_norm": 0.26261308789253235, + "learning_rate": 2.2732137938960335e-05, + "loss": 0.05402183532714844, + "step": 5192 + }, + { + "epoch": 0.35102068406110587, + "grad_norm": 1.4824273586273193, + "learning_rate": 2.27293103709959e-05, + "loss": 0.11045455932617188, + "step": 5193 + }, + { + "epoch": 0.35108827903204004, + "grad_norm": 1.7210636138916016, + "learning_rate": 2.272648242903936e-05, + "loss": 0.233367919921875, + "step": 5194 + }, + { + "epoch": 0.35115587400297416, + "grad_norm": 0.7941687703132629, + "learning_rate": 2.2723654113227544e-05, + "loss": 0.09553909301757812, + "step": 5195 + }, + { + "epoch": 0.3512234689739083, + "grad_norm": 0.4931658208370209, + "learning_rate": 2.2720825423697315e-05, + "loss": 0.1330718994140625, + "step": 5196 + }, + { + "epoch": 0.3512910639448425, + "grad_norm": 0.4159606695175171, + "learning_rate": 2.2717996360585535e-05, + "loss": 0.0707855224609375, + "step": 5197 + }, + { + "epoch": 0.35135865891577667, + "grad_norm": 0.16841943562030792, + "learning_rate": 2.271516692402909e-05, + "loss": 0.024713516235351562, + "step": 5198 + }, + { + "epoch": 0.35142625388671084, + "grad_norm": 0.45401060581207275, + "learning_rate": 2.271233711416489e-05, + "loss": 0.11804771423339844, + "step": 5199 + }, + { + "epoch": 0.351493848857645, + "grad_norm": 0.6673094034194946, + "learning_rate": 2.2709506931129858e-05, + "loss": 0.0828704833984375, + "step": 5200 + }, + { + "epoch": 0.3515614438285792, + "grad_norm": 0.7688940763473511, + "learning_rate": 2.2706676375060936e-05, + "loss": 0.1830596923828125, + "step": 5201 + }, + { + "epoch": 0.3516290387995133, + "grad_norm": 0.2876627445220947, + "learning_rate": 2.2703845446095083e-05, + "loss": 0.04732513427734375, + "step": 5202 + }, + { + "epoch": 0.35169663377044746, + "grad_norm": 0.4754655957221985, + "learning_rate": 2.270101414436928e-05, + "loss": 0.10268402099609375, + "step": 5203 + }, + { + "epoch": 0.35176422874138163, + "grad_norm": 0.8085731863975525, + "learning_rate": 2.2698182470020527e-05, + "loss": 0.13507080078125, + "step": 5204 + }, + { + "epoch": 0.3518318237123158, + "grad_norm": 1.6338013410568237, + "learning_rate": 2.2695350423185822e-05, + "loss": 0.202178955078125, + "step": 5205 + }, + { + "epoch": 0.35189941868325, + "grad_norm": 0.5796405673027039, + "learning_rate": 2.269251800400221e-05, + "loss": 0.1623077392578125, + "step": 5206 + }, + { + "epoch": 0.35196701365418415, + "grad_norm": 0.34379851818084717, + "learning_rate": 2.2689685212606733e-05, + "loss": 0.05988311767578125, + "step": 5207 + }, + { + "epoch": 0.3520346086251183, + "grad_norm": 1.3955872058868408, + "learning_rate": 2.2686852049136473e-05, + "loss": 0.21221923828125, + "step": 5208 + }, + { + "epoch": 0.35210220359605243, + "grad_norm": 0.5674917101860046, + "learning_rate": 2.26840185137285e-05, + "loss": 0.098724365234375, + "step": 5209 + }, + { + "epoch": 0.3521697985669866, + "grad_norm": 0.5005835294723511, + "learning_rate": 2.268118460651993e-05, + "loss": 0.0938568115234375, + "step": 5210 + }, + { + "epoch": 0.3522373935379208, + "grad_norm": 0.5155926942825317, + "learning_rate": 2.267835032764787e-05, + "loss": 0.09033203125, + "step": 5211 + }, + { + "epoch": 0.35230498850885494, + "grad_norm": 0.3125703036785126, + "learning_rate": 2.267551567724948e-05, + "loss": 0.06461334228515625, + "step": 5212 + }, + { + "epoch": 0.3523725834797891, + "grad_norm": 0.9880190491676331, + "learning_rate": 2.2672680655461904e-05, + "loss": 0.1568756103515625, + "step": 5213 + }, + { + "epoch": 0.3524401784507233, + "grad_norm": 1.035520315170288, + "learning_rate": 2.2669845262422324e-05, + "loss": 0.188995361328125, + "step": 5214 + }, + { + "epoch": 0.35250777342165746, + "grad_norm": 0.3524642884731293, + "learning_rate": 2.2667009498267927e-05, + "loss": 0.06232643127441406, + "step": 5215 + }, + { + "epoch": 0.35257536839259157, + "grad_norm": 0.46666058897972107, + "learning_rate": 2.2664173363135935e-05, + "loss": 0.08179283142089844, + "step": 5216 + }, + { + "epoch": 0.35264296336352574, + "grad_norm": 0.33863529562950134, + "learning_rate": 2.2661336857163574e-05, + "loss": 0.04805755615234375, + "step": 5217 + }, + { + "epoch": 0.3527105583344599, + "grad_norm": 0.6103351712226868, + "learning_rate": 2.2658499980488088e-05, + "loss": 0.105926513671875, + "step": 5218 + }, + { + "epoch": 0.3527781533053941, + "grad_norm": 1.7543398141860962, + "learning_rate": 2.2655662733246746e-05, + "loss": 0.277496337890625, + "step": 5219 + }, + { + "epoch": 0.35284574827632825, + "grad_norm": 0.6139993071556091, + "learning_rate": 2.265282511557683e-05, + "loss": 0.12135696411132812, + "step": 5220 + }, + { + "epoch": 0.3529133432472624, + "grad_norm": 0.8696273565292358, + "learning_rate": 2.264998712761564e-05, + "loss": 0.15911865234375, + "step": 5221 + }, + { + "epoch": 0.3529809382181966, + "grad_norm": 0.39495569467544556, + "learning_rate": 2.26471487695005e-05, + "loss": 0.099578857421875, + "step": 5222 + }, + { + "epoch": 0.3530485331891307, + "grad_norm": 0.4907020330429077, + "learning_rate": 2.2644310041368744e-05, + "loss": 0.12160491943359375, + "step": 5223 + }, + { + "epoch": 0.3531161281600649, + "grad_norm": 0.7259909510612488, + "learning_rate": 2.264147094335773e-05, + "loss": 0.1559600830078125, + "step": 5224 + }, + { + "epoch": 0.35318372313099905, + "grad_norm": 0.6860354542732239, + "learning_rate": 2.263863147560483e-05, + "loss": 0.16121673583984375, + "step": 5225 + }, + { + "epoch": 0.3532513181019332, + "grad_norm": 0.8957836031913757, + "learning_rate": 2.2635791638247437e-05, + "loss": 0.142059326171875, + "step": 5226 + }, + { + "epoch": 0.3533189130728674, + "grad_norm": 0.509551465511322, + "learning_rate": 2.2632951431422958e-05, + "loss": 0.07866668701171875, + "step": 5227 + }, + { + "epoch": 0.35338650804380156, + "grad_norm": 0.1616186499595642, + "learning_rate": 2.2630110855268817e-05, + "loss": 0.032474517822265625, + "step": 5228 + }, + { + "epoch": 0.3534541030147357, + "grad_norm": 0.39650997519493103, + "learning_rate": 2.2627269909922465e-05, + "loss": 0.06574249267578125, + "step": 5229 + }, + { + "epoch": 0.35352169798566985, + "grad_norm": 0.8755261301994324, + "learning_rate": 2.262442859552136e-05, + "loss": 0.1348876953125, + "step": 5230 + }, + { + "epoch": 0.353589292956604, + "grad_norm": 0.8684614896774292, + "learning_rate": 2.262158691220298e-05, + "loss": 0.192138671875, + "step": 5231 + }, + { + "epoch": 0.3536568879275382, + "grad_norm": 0.25463080406188965, + "learning_rate": 2.261874486010483e-05, + "loss": 0.034282684326171875, + "step": 5232 + }, + { + "epoch": 0.35372448289847236, + "grad_norm": 0.4231691360473633, + "learning_rate": 2.2615902439364423e-05, + "loss": 0.0849609375, + "step": 5233 + }, + { + "epoch": 0.35379207786940653, + "grad_norm": 0.3176732361316681, + "learning_rate": 2.2613059650119295e-05, + "loss": 0.0685272216796875, + "step": 5234 + }, + { + "epoch": 0.3538596728403407, + "grad_norm": 1.5018000602722168, + "learning_rate": 2.2610216492506995e-05, + "loss": 0.17969131469726562, + "step": 5235 + }, + { + "epoch": 0.3539272678112748, + "grad_norm": 0.5453265905380249, + "learning_rate": 2.2607372966665092e-05, + "loss": 0.0896148681640625, + "step": 5236 + }, + { + "epoch": 0.353994862782209, + "grad_norm": 0.8180318474769592, + "learning_rate": 2.2604529072731175e-05, + "loss": 0.10329437255859375, + "step": 5237 + }, + { + "epoch": 0.35406245775314316, + "grad_norm": 1.3085222244262695, + "learning_rate": 2.260168481084285e-05, + "loss": 0.198150634765625, + "step": 5238 + }, + { + "epoch": 0.3541300527240773, + "grad_norm": 0.49729442596435547, + "learning_rate": 2.2598840181137746e-05, + "loss": 0.0862274169921875, + "step": 5239 + }, + { + "epoch": 0.3541976476950115, + "grad_norm": 0.5217711329460144, + "learning_rate": 2.259599518375349e-05, + "loss": 0.1226959228515625, + "step": 5240 + }, + { + "epoch": 0.35426524266594567, + "grad_norm": 0.33206993341445923, + "learning_rate": 2.2593149818827746e-05, + "loss": 0.049449920654296875, + "step": 5241 + }, + { + "epoch": 0.35433283763687984, + "grad_norm": 1.2168011665344238, + "learning_rate": 2.25903040864982e-05, + "loss": 0.1717815399169922, + "step": 5242 + }, + { + "epoch": 0.35440043260781395, + "grad_norm": 1.203943133354187, + "learning_rate": 2.2587457986902535e-05, + "loss": 0.201324462890625, + "step": 5243 + }, + { + "epoch": 0.3544680275787481, + "grad_norm": 0.5996583104133606, + "learning_rate": 2.2584611520178465e-05, + "loss": 0.13669586181640625, + "step": 5244 + }, + { + "epoch": 0.3545356225496823, + "grad_norm": 0.43276411294937134, + "learning_rate": 2.2581764686463723e-05, + "loss": 0.10068130493164062, + "step": 5245 + }, + { + "epoch": 0.35460321752061646, + "grad_norm": 1.2234296798706055, + "learning_rate": 2.2578917485896057e-05, + "loss": 0.17333984375, + "step": 5246 + }, + { + "epoch": 0.35467081249155064, + "grad_norm": 0.90549635887146, + "learning_rate": 2.2576069918613226e-05, + "loss": 0.14992904663085938, + "step": 5247 + }, + { + "epoch": 0.3547384074624848, + "grad_norm": 0.9133997559547424, + "learning_rate": 2.257322198475302e-05, + "loss": 0.1316070556640625, + "step": 5248 + }, + { + "epoch": 0.354806002433419, + "grad_norm": 0.26408711075782776, + "learning_rate": 2.2570373684453233e-05, + "loss": 0.06977081298828125, + "step": 5249 + }, + { + "epoch": 0.3548735974043531, + "grad_norm": 0.5609915852546692, + "learning_rate": 2.2567525017851692e-05, + "loss": 0.1184539794921875, + "step": 5250 + }, + { + "epoch": 0.35494119237528726, + "grad_norm": 0.7795847654342651, + "learning_rate": 2.256467598508623e-05, + "loss": 0.1236724853515625, + "step": 5251 + }, + { + "epoch": 0.35500878734622143, + "grad_norm": 0.33645737171173096, + "learning_rate": 2.2561826586294697e-05, + "loss": 0.031406402587890625, + "step": 5252 + }, + { + "epoch": 0.3550763823171556, + "grad_norm": 0.7793053984642029, + "learning_rate": 2.255897682161497e-05, + "loss": 0.1337432861328125, + "step": 5253 + }, + { + "epoch": 0.3551439772880898, + "grad_norm": 0.2545807361602783, + "learning_rate": 2.255612669118493e-05, + "loss": 0.0672607421875, + "step": 5254 + }, + { + "epoch": 0.35521157225902394, + "grad_norm": 0.6515668034553528, + "learning_rate": 2.2553276195142494e-05, + "loss": 0.122314453125, + "step": 5255 + }, + { + "epoch": 0.3552791672299581, + "grad_norm": 0.48860087990760803, + "learning_rate": 2.2550425333625582e-05, + "loss": 0.1172332763671875, + "step": 5256 + }, + { + "epoch": 0.35534676220089223, + "grad_norm": 0.2491050660610199, + "learning_rate": 2.2547574106772135e-05, + "loss": 0.0631866455078125, + "step": 5257 + }, + { + "epoch": 0.3554143571718264, + "grad_norm": 0.3029486835002899, + "learning_rate": 2.2544722514720114e-05, + "loss": 0.0852203369140625, + "step": 5258 + }, + { + "epoch": 0.35548195214276057, + "grad_norm": 0.31472909450531006, + "learning_rate": 2.25418705576075e-05, + "loss": 0.0457305908203125, + "step": 5259 + }, + { + "epoch": 0.35554954711369474, + "grad_norm": 0.24621246755123138, + "learning_rate": 2.253901823557229e-05, + "loss": 0.0477142333984375, + "step": 5260 + }, + { + "epoch": 0.3556171420846289, + "grad_norm": 0.7583276033401489, + "learning_rate": 2.2536165548752484e-05, + "loss": 0.12607955932617188, + "step": 5261 + }, + { + "epoch": 0.3556847370555631, + "grad_norm": 0.30185335874557495, + "learning_rate": 2.2533312497286124e-05, + "loss": 0.0639190673828125, + "step": 5262 + }, + { + "epoch": 0.35575233202649725, + "grad_norm": 0.4309576153755188, + "learning_rate": 2.253045908131126e-05, + "loss": 0.08734130859375, + "step": 5263 + }, + { + "epoch": 0.35581992699743137, + "grad_norm": 0.43808841705322266, + "learning_rate": 2.252760530096596e-05, + "loss": 0.06963729858398438, + "step": 5264 + }, + { + "epoch": 0.35588752196836554, + "grad_norm": 0.27662307024002075, + "learning_rate": 2.2524751156388287e-05, + "loss": 0.031158447265625, + "step": 5265 + }, + { + "epoch": 0.3559551169392997, + "grad_norm": 0.8744943141937256, + "learning_rate": 2.252189664771637e-05, + "loss": 0.180084228515625, + "step": 5266 + }, + { + "epoch": 0.3560227119102339, + "grad_norm": 1.6980195045471191, + "learning_rate": 2.2519041775088308e-05, + "loss": 0.229949951171875, + "step": 5267 + }, + { + "epoch": 0.35609030688116805, + "grad_norm": 0.5418623685836792, + "learning_rate": 2.2516186538642246e-05, + "loss": 0.080352783203125, + "step": 5268 + }, + { + "epoch": 0.3561579018521022, + "grad_norm": 0.3738362491130829, + "learning_rate": 2.2513330938516336e-05, + "loss": 0.07287979125976562, + "step": 5269 + }, + { + "epoch": 0.3562254968230364, + "grad_norm": 0.27809596061706543, + "learning_rate": 2.251047497484875e-05, + "loss": 0.03151702880859375, + "step": 5270 + }, + { + "epoch": 0.3562930917939705, + "grad_norm": 0.8959876298904419, + "learning_rate": 2.250761864777768e-05, + "loss": 0.1958770751953125, + "step": 5271 + }, + { + "epoch": 0.3563606867649047, + "grad_norm": 0.7518210411071777, + "learning_rate": 2.250476195744133e-05, + "loss": 0.141632080078125, + "step": 5272 + }, + { + "epoch": 0.35642828173583885, + "grad_norm": 0.5926613807678223, + "learning_rate": 2.2501904903977925e-05, + "loss": 0.1302337646484375, + "step": 5273 + }, + { + "epoch": 0.356495876706773, + "grad_norm": 1.0984281301498413, + "learning_rate": 2.24990474875257e-05, + "loss": 0.16720199584960938, + "step": 5274 + }, + { + "epoch": 0.3565634716777072, + "grad_norm": 0.4458605945110321, + "learning_rate": 2.2496189708222933e-05, + "loss": 0.07003021240234375, + "step": 5275 + }, + { + "epoch": 0.35663106664864136, + "grad_norm": 1.0726289749145508, + "learning_rate": 2.2493331566207883e-05, + "loss": 0.14328861236572266, + "step": 5276 + }, + { + "epoch": 0.35669866161957553, + "grad_norm": 0.3421489894390106, + "learning_rate": 2.2490473061618852e-05, + "loss": 0.06015777587890625, + "step": 5277 + }, + { + "epoch": 0.35676625659050965, + "grad_norm": 0.45114952325820923, + "learning_rate": 2.2487614194594154e-05, + "loss": 0.0760498046875, + "step": 5278 + }, + { + "epoch": 0.3568338515614438, + "grad_norm": 0.8655304908752441, + "learning_rate": 2.2484754965272116e-05, + "loss": 0.14118194580078125, + "step": 5279 + }, + { + "epoch": 0.356901446532378, + "grad_norm": 0.3800870180130005, + "learning_rate": 2.248189537379108e-05, + "loss": 0.07662582397460938, + "step": 5280 + }, + { + "epoch": 0.35696904150331216, + "grad_norm": 0.4799764156341553, + "learning_rate": 2.2479035420289426e-05, + "loss": 0.071319580078125, + "step": 5281 + }, + { + "epoch": 0.3570366364742463, + "grad_norm": 0.745237410068512, + "learning_rate": 2.2476175104905517e-05, + "loss": 0.1299285888671875, + "step": 5282 + }, + { + "epoch": 0.3571042314451805, + "grad_norm": 0.3088464140892029, + "learning_rate": 2.2473314427777766e-05, + "loss": 0.0702056884765625, + "step": 5283 + }, + { + "epoch": 0.35717182641611467, + "grad_norm": 0.1809242218732834, + "learning_rate": 2.247045338904459e-05, + "loss": 0.03075408935546875, + "step": 5284 + }, + { + "epoch": 0.3572394213870488, + "grad_norm": 0.7437956929206848, + "learning_rate": 2.2467591988844416e-05, + "loss": 0.12321281433105469, + "step": 5285 + }, + { + "epoch": 0.35730701635798295, + "grad_norm": 0.22759325802326202, + "learning_rate": 2.24647302273157e-05, + "loss": 0.0348358154296875, + "step": 5286 + }, + { + "epoch": 0.3573746113289171, + "grad_norm": 0.5403704047203064, + "learning_rate": 2.246186810459692e-05, + "loss": 0.118408203125, + "step": 5287 + }, + { + "epoch": 0.3574422062998513, + "grad_norm": 0.8145089745521545, + "learning_rate": 2.2459005620826554e-05, + "loss": 0.162872314453125, + "step": 5288 + }, + { + "epoch": 0.35750980127078547, + "grad_norm": 0.664395809173584, + "learning_rate": 2.2456142776143108e-05, + "loss": 0.0921630859375, + "step": 5289 + }, + { + "epoch": 0.35757739624171964, + "grad_norm": 0.6110082864761353, + "learning_rate": 2.24532795706851e-05, + "loss": 0.134185791015625, + "step": 5290 + }, + { + "epoch": 0.35764499121265375, + "grad_norm": 0.345763236284256, + "learning_rate": 2.2450416004591083e-05, + "loss": 0.04865264892578125, + "step": 5291 + }, + { + "epoch": 0.3577125861835879, + "grad_norm": 0.5794229507446289, + "learning_rate": 2.2447552077999602e-05, + "loss": 0.11667633056640625, + "step": 5292 + }, + { + "epoch": 0.3577801811545221, + "grad_norm": 0.22642913460731506, + "learning_rate": 2.2444687791049236e-05, + "loss": 0.04993438720703125, + "step": 5293 + }, + { + "epoch": 0.35784777612545626, + "grad_norm": 0.5792099237442017, + "learning_rate": 2.2441823143878577e-05, + "loss": 0.08892822265625, + "step": 5294 + }, + { + "epoch": 0.35791537109639043, + "grad_norm": 0.3777136504650116, + "learning_rate": 2.2438958136626233e-05, + "loss": 0.0660247802734375, + "step": 5295 + }, + { + "epoch": 0.3579829660673246, + "grad_norm": 0.5770601034164429, + "learning_rate": 2.2436092769430836e-05, + "loss": 0.11758041381835938, + "step": 5296 + }, + { + "epoch": 0.3580505610382588, + "grad_norm": 0.7859377264976501, + "learning_rate": 2.2433227042431022e-05, + "loss": 0.1012725830078125, + "step": 5297 + }, + { + "epoch": 0.3581181560091929, + "grad_norm": 1.0225441455841064, + "learning_rate": 2.2430360955765457e-05, + "loss": 0.17609405517578125, + "step": 5298 + }, + { + "epoch": 0.35818575098012706, + "grad_norm": 0.5746550559997559, + "learning_rate": 2.2427494509572824e-05, + "loss": 0.09130859375, + "step": 5299 + }, + { + "epoch": 0.35825334595106123, + "grad_norm": 1.6477015018463135, + "learning_rate": 2.2424627703991813e-05, + "loss": 0.194366455078125, + "step": 5300 + }, + { + "epoch": 0.3583209409219954, + "grad_norm": 0.5343949794769287, + "learning_rate": 2.242176053916114e-05, + "loss": 0.11510467529296875, + "step": 5301 + }, + { + "epoch": 0.35838853589292957, + "grad_norm": 0.8391137719154358, + "learning_rate": 2.241889301521954e-05, + "loss": 0.1701507568359375, + "step": 5302 + }, + { + "epoch": 0.35845613086386374, + "grad_norm": 0.4084276258945465, + "learning_rate": 2.2416025132305752e-05, + "loss": 0.0605010986328125, + "step": 5303 + }, + { + "epoch": 0.3585237258347979, + "grad_norm": 0.35891589522361755, + "learning_rate": 2.2413156890558552e-05, + "loss": 0.0710296630859375, + "step": 5304 + }, + { + "epoch": 0.35859132080573203, + "grad_norm": 0.39434710144996643, + "learning_rate": 2.2410288290116718e-05, + "loss": 0.0711669921875, + "step": 5305 + }, + { + "epoch": 0.3586589157766662, + "grad_norm": 0.3071916401386261, + "learning_rate": 2.2407419331119057e-05, + "loss": 0.0699920654296875, + "step": 5306 + }, + { + "epoch": 0.35872651074760037, + "grad_norm": 0.33473142981529236, + "learning_rate": 2.2404550013704375e-05, + "loss": 0.06656265258789062, + "step": 5307 + }, + { + "epoch": 0.35879410571853454, + "grad_norm": 0.3711496889591217, + "learning_rate": 2.240168033801152e-05, + "loss": 0.07651138305664062, + "step": 5308 + }, + { + "epoch": 0.3588617006894687, + "grad_norm": 0.9359216094017029, + "learning_rate": 2.239881030417934e-05, + "loss": 0.22900390625, + "step": 5309 + }, + { + "epoch": 0.3589292956604029, + "grad_norm": 1.3988345861434937, + "learning_rate": 2.2395939912346706e-05, + "loss": 0.195831298828125, + "step": 5310 + }, + { + "epoch": 0.35899689063133705, + "grad_norm": 0.17484991252422333, + "learning_rate": 2.23930691626525e-05, + "loss": 0.038745880126953125, + "step": 5311 + }, + { + "epoch": 0.35906448560227117, + "grad_norm": 0.23320120573043823, + "learning_rate": 2.2390198055235634e-05, + "loss": 0.03751373291015625, + "step": 5312 + }, + { + "epoch": 0.35913208057320534, + "grad_norm": 0.5951047539710999, + "learning_rate": 2.2387326590235027e-05, + "loss": 0.1229400634765625, + "step": 5313 + }, + { + "epoch": 0.3591996755441395, + "grad_norm": 1.0501865148544312, + "learning_rate": 2.2384454767789616e-05, + "loss": 0.26641845703125, + "step": 5314 + }, + { + "epoch": 0.3592672705150737, + "grad_norm": 1.2749139070510864, + "learning_rate": 2.238158258803836e-05, + "loss": 0.2169189453125, + "step": 5315 + }, + { + "epoch": 0.35933486548600785, + "grad_norm": 0.623396098613739, + "learning_rate": 2.2378710051120234e-05, + "loss": 0.0853271484375, + "step": 5316 + }, + { + "epoch": 0.359402460456942, + "grad_norm": 0.7866125106811523, + "learning_rate": 2.2375837157174225e-05, + "loss": 0.20330810546875, + "step": 5317 + }, + { + "epoch": 0.3594700554278762, + "grad_norm": 0.8902850151062012, + "learning_rate": 2.237296390633935e-05, + "loss": 0.11329460144042969, + "step": 5318 + }, + { + "epoch": 0.3595376503988103, + "grad_norm": 0.39621275663375854, + "learning_rate": 2.237009029875463e-05, + "loss": 0.10294342041015625, + "step": 5319 + }, + { + "epoch": 0.3596052453697445, + "grad_norm": 0.3073650896549225, + "learning_rate": 2.2367216334559097e-05, + "loss": 0.06223297119140625, + "step": 5320 + }, + { + "epoch": 0.35967284034067865, + "grad_norm": 0.22787919640541077, + "learning_rate": 2.236434201389183e-05, + "loss": 0.047977447509765625, + "step": 5321 + }, + { + "epoch": 0.3597404353116128, + "grad_norm": 0.5142772793769836, + "learning_rate": 2.2361467336891897e-05, + "loss": 0.06485748291015625, + "step": 5322 + }, + { + "epoch": 0.359808030282547, + "grad_norm": 0.2863757610321045, + "learning_rate": 2.2358592303698392e-05, + "loss": 0.06072998046875, + "step": 5323 + }, + { + "epoch": 0.35987562525348116, + "grad_norm": 0.37998566031455994, + "learning_rate": 2.235571691445043e-05, + "loss": 0.0597076416015625, + "step": 5324 + }, + { + "epoch": 0.35994322022441533, + "grad_norm": 0.5471479296684265, + "learning_rate": 2.235284116928714e-05, + "loss": 0.13014984130859375, + "step": 5325 + }, + { + "epoch": 0.36001081519534944, + "grad_norm": 0.3112901747226715, + "learning_rate": 2.2349965068347667e-05, + "loss": 0.05792236328125, + "step": 5326 + }, + { + "epoch": 0.3600784101662836, + "grad_norm": 0.22011063992977142, + "learning_rate": 2.2347088611771178e-05, + "loss": 0.036289215087890625, + "step": 5327 + }, + { + "epoch": 0.3601460051372178, + "grad_norm": 0.281561017036438, + "learning_rate": 2.2344211799696842e-05, + "loss": 0.06395721435546875, + "step": 5328 + }, + { + "epoch": 0.36021360010815195, + "grad_norm": 0.7164864540100098, + "learning_rate": 2.2341334632263873e-05, + "loss": 0.09415054321289062, + "step": 5329 + }, + { + "epoch": 0.3602811950790861, + "grad_norm": 0.2510460615158081, + "learning_rate": 2.2338457109611476e-05, + "loss": 0.048961639404296875, + "step": 5330 + }, + { + "epoch": 0.3603487900500203, + "grad_norm": 0.22399428486824036, + "learning_rate": 2.233557923187889e-05, + "loss": 0.04290771484375, + "step": 5331 + }, + { + "epoch": 0.36041638502095447, + "grad_norm": 0.9577419757843018, + "learning_rate": 2.2332700999205354e-05, + "loss": 0.1787109375, + "step": 5332 + }, + { + "epoch": 0.3604839799918886, + "grad_norm": 1.079264760017395, + "learning_rate": 2.232982241173015e-05, + "loss": 0.14786529541015625, + "step": 5333 + }, + { + "epoch": 0.36055157496282275, + "grad_norm": 0.8267684578895569, + "learning_rate": 2.232694346959255e-05, + "loss": 0.12778472900390625, + "step": 5334 + }, + { + "epoch": 0.3606191699337569, + "grad_norm": 0.9004457592964172, + "learning_rate": 2.2324064172931866e-05, + "loss": 0.208099365234375, + "step": 5335 + }, + { + "epoch": 0.3606867649046911, + "grad_norm": 1.2998524904251099, + "learning_rate": 2.2321184521887402e-05, + "loss": 0.1828765869140625, + "step": 5336 + }, + { + "epoch": 0.36075435987562526, + "grad_norm": 0.7950384616851807, + "learning_rate": 2.2318304516598503e-05, + "loss": 0.181365966796875, + "step": 5337 + }, + { + "epoch": 0.36082195484655943, + "grad_norm": 0.5055760145187378, + "learning_rate": 2.231542415720452e-05, + "loss": 0.1236572265625, + "step": 5338 + }, + { + "epoch": 0.3608895498174936, + "grad_norm": 0.8509258031845093, + "learning_rate": 2.2312543443844822e-05, + "loss": 0.12841796875, + "step": 5339 + }, + { + "epoch": 0.3609571447884277, + "grad_norm": 0.3871576488018036, + "learning_rate": 2.2309662376658794e-05, + "loss": 0.056732177734375, + "step": 5340 + }, + { + "epoch": 0.3610247397593619, + "grad_norm": 0.6202695965766907, + "learning_rate": 2.230678095578584e-05, + "loss": 0.12548828125, + "step": 5341 + }, + { + "epoch": 0.36109233473029606, + "grad_norm": 0.9103687405586243, + "learning_rate": 2.230389918136539e-05, + "loss": 0.139739990234375, + "step": 5342 + }, + { + "epoch": 0.36115992970123023, + "grad_norm": 0.2953839600086212, + "learning_rate": 2.2301017053536872e-05, + "loss": 0.06781387329101562, + "step": 5343 + }, + { + "epoch": 0.3612275246721644, + "grad_norm": 0.5871710181236267, + "learning_rate": 2.2298134572439745e-05, + "loss": 0.117431640625, + "step": 5344 + }, + { + "epoch": 0.3612951196430986, + "grad_norm": 0.73885577917099, + "learning_rate": 2.229525173821348e-05, + "loss": 0.14291000366210938, + "step": 5345 + }, + { + "epoch": 0.36136271461403274, + "grad_norm": 0.7531721591949463, + "learning_rate": 2.2292368550997567e-05, + "loss": 0.13597869873046875, + "step": 5346 + }, + { + "epoch": 0.36143030958496686, + "grad_norm": 1.0738180875778198, + "learning_rate": 2.2289485010931515e-05, + "loss": 0.2085418701171875, + "step": 5347 + }, + { + "epoch": 0.36149790455590103, + "grad_norm": 0.8432738184928894, + "learning_rate": 2.228660111815484e-05, + "loss": 0.214630126953125, + "step": 5348 + }, + { + "epoch": 0.3615654995268352, + "grad_norm": 0.16975131630897522, + "learning_rate": 2.2283716872807088e-05, + "loss": 0.022686004638671875, + "step": 5349 + }, + { + "epoch": 0.36163309449776937, + "grad_norm": 0.5482372641563416, + "learning_rate": 2.228083227502782e-05, + "loss": 0.1059417724609375, + "step": 5350 + }, + { + "epoch": 0.36170068946870354, + "grad_norm": 0.408704936504364, + "learning_rate": 2.2277947324956612e-05, + "loss": 0.09774017333984375, + "step": 5351 + }, + { + "epoch": 0.3617682844396377, + "grad_norm": 0.8328374028205872, + "learning_rate": 2.2275062022733045e-05, + "loss": 0.1531219482421875, + "step": 5352 + }, + { + "epoch": 0.3618358794105719, + "grad_norm": 0.4206582307815552, + "learning_rate": 2.227217636849673e-05, + "loss": 0.1128997802734375, + "step": 5353 + }, + { + "epoch": 0.361903474381506, + "grad_norm": 1.4760019779205322, + "learning_rate": 2.2269290362387308e-05, + "loss": 0.260345458984375, + "step": 5354 + }, + { + "epoch": 0.36197106935244017, + "grad_norm": 0.44105836749076843, + "learning_rate": 2.2266404004544405e-05, + "loss": 0.11444091796875, + "step": 5355 + }, + { + "epoch": 0.36203866432337434, + "grad_norm": 0.3312775790691376, + "learning_rate": 2.226351729510769e-05, + "loss": 0.0670318603515625, + "step": 5356 + }, + { + "epoch": 0.3621062592943085, + "grad_norm": 0.24806880950927734, + "learning_rate": 2.2260630234216835e-05, + "loss": 0.0464935302734375, + "step": 5357 + }, + { + "epoch": 0.3621738542652427, + "grad_norm": 0.42794206738471985, + "learning_rate": 2.2257742822011537e-05, + "loss": 0.06705856323242188, + "step": 5358 + }, + { + "epoch": 0.36224144923617685, + "grad_norm": 1.1064741611480713, + "learning_rate": 2.2254855058631505e-05, + "loss": 0.200225830078125, + "step": 5359 + }, + { + "epoch": 0.36230904420711096, + "grad_norm": 0.5215597152709961, + "learning_rate": 2.2251966944216463e-05, + "loss": 0.099884033203125, + "step": 5360 + }, + { + "epoch": 0.36237663917804513, + "grad_norm": 0.9837132692337036, + "learning_rate": 2.2249078478906164e-05, + "loss": 0.18109893798828125, + "step": 5361 + }, + { + "epoch": 0.3624442341489793, + "grad_norm": 0.8132715821266174, + "learning_rate": 2.2246189662840368e-05, + "loss": 0.2058258056640625, + "step": 5362 + }, + { + "epoch": 0.3625118291199135, + "grad_norm": 0.1778847575187683, + "learning_rate": 2.2243300496158854e-05, + "loss": 0.0317840576171875, + "step": 5363 + }, + { + "epoch": 0.36257942409084765, + "grad_norm": 0.46507108211517334, + "learning_rate": 2.2240410979001418e-05, + "loss": 0.076904296875, + "step": 5364 + }, + { + "epoch": 0.3626470190617818, + "grad_norm": 0.8117858171463013, + "learning_rate": 2.223752111150787e-05, + "loss": 0.13714599609375, + "step": 5365 + }, + { + "epoch": 0.362714614032716, + "grad_norm": 0.9190390706062317, + "learning_rate": 2.223463089381803e-05, + "loss": 0.146270751953125, + "step": 5366 + }, + { + "epoch": 0.3627822090036501, + "grad_norm": 1.0351736545562744, + "learning_rate": 2.2231740326071766e-05, + "loss": 0.14794921875, + "step": 5367 + }, + { + "epoch": 0.3628498039745843, + "grad_norm": 0.38371574878692627, + "learning_rate": 2.2228849408408933e-05, + "loss": 0.072662353515625, + "step": 5368 + }, + { + "epoch": 0.36291739894551844, + "grad_norm": 0.4982500970363617, + "learning_rate": 2.2225958140969405e-05, + "loss": 0.0545806884765625, + "step": 5369 + }, + { + "epoch": 0.3629849939164526, + "grad_norm": 0.8099756240844727, + "learning_rate": 2.2223066523893088e-05, + "loss": 0.1156005859375, + "step": 5370 + }, + { + "epoch": 0.3630525888873868, + "grad_norm": 0.3307383060455322, + "learning_rate": 2.222017455731989e-05, + "loss": 0.08046340942382812, + "step": 5371 + }, + { + "epoch": 0.36312018385832096, + "grad_norm": 0.5422239899635315, + "learning_rate": 2.221728224138975e-05, + "loss": 0.08910369873046875, + "step": 5372 + }, + { + "epoch": 0.3631877788292551, + "grad_norm": 0.9221345782279968, + "learning_rate": 2.2214389576242604e-05, + "loss": 0.166656494140625, + "step": 5373 + }, + { + "epoch": 0.36325537380018924, + "grad_norm": 0.6795100569725037, + "learning_rate": 2.221149656201843e-05, + "loss": 0.1253814697265625, + "step": 5374 + }, + { + "epoch": 0.3633229687711234, + "grad_norm": 0.5332005620002747, + "learning_rate": 2.22086031988572e-05, + "loss": 0.1265869140625, + "step": 5375 + }, + { + "epoch": 0.3633905637420576, + "grad_norm": 1.2054606676101685, + "learning_rate": 2.220570948689892e-05, + "loss": 0.1413726806640625, + "step": 5376 + }, + { + "epoch": 0.36345815871299175, + "grad_norm": 0.5761802196502686, + "learning_rate": 2.2202815426283606e-05, + "loss": 0.102874755859375, + "step": 5377 + }, + { + "epoch": 0.3635257536839259, + "grad_norm": 0.9171550869941711, + "learning_rate": 2.2199921017151284e-05, + "loss": 0.16187286376953125, + "step": 5378 + }, + { + "epoch": 0.3635933486548601, + "grad_norm": 0.5990009307861328, + "learning_rate": 2.2197026259642004e-05, + "loss": 0.1571044921875, + "step": 5379 + }, + { + "epoch": 0.36366094362579426, + "grad_norm": 0.7942307591438293, + "learning_rate": 2.219413115389584e-05, + "loss": 0.1488037109375, + "step": 5380 + }, + { + "epoch": 0.3637285385967284, + "grad_norm": 0.37905484437942505, + "learning_rate": 2.2191235700052877e-05, + "loss": 0.07217788696289062, + "step": 5381 + }, + { + "epoch": 0.36379613356766255, + "grad_norm": 0.6043568253517151, + "learning_rate": 2.21883398982532e-05, + "loss": 0.09755706787109375, + "step": 5382 + }, + { + "epoch": 0.3638637285385967, + "grad_norm": 1.0314091444015503, + "learning_rate": 2.218544374863694e-05, + "loss": 0.135589599609375, + "step": 5383 + }, + { + "epoch": 0.3639313235095309, + "grad_norm": 0.5950282216072083, + "learning_rate": 2.218254725134422e-05, + "loss": 0.1214447021484375, + "step": 5384 + }, + { + "epoch": 0.36399891848046506, + "grad_norm": 0.8648179769515991, + "learning_rate": 2.21796504065152e-05, + "loss": 0.16131591796875, + "step": 5385 + }, + { + "epoch": 0.36406651345139923, + "grad_norm": 1.4808993339538574, + "learning_rate": 2.2176753214290048e-05, + "loss": 0.13925933837890625, + "step": 5386 + }, + { + "epoch": 0.3641341084223334, + "grad_norm": 0.6429163217544556, + "learning_rate": 2.2173855674808935e-05, + "loss": 0.09971809387207031, + "step": 5387 + }, + { + "epoch": 0.3642017033932675, + "grad_norm": 0.642757773399353, + "learning_rate": 2.2170957788212076e-05, + "loss": 0.0965423583984375, + "step": 5388 + }, + { + "epoch": 0.3642692983642017, + "grad_norm": 1.0651859045028687, + "learning_rate": 2.2168059554639684e-05, + "loss": 0.188232421875, + "step": 5389 + }, + { + "epoch": 0.36433689333513586, + "grad_norm": 0.5952459573745728, + "learning_rate": 2.2165160974231993e-05, + "loss": 0.09449005126953125, + "step": 5390 + }, + { + "epoch": 0.36440448830607003, + "grad_norm": 0.815653920173645, + "learning_rate": 2.2162262047129248e-05, + "loss": 0.1220245361328125, + "step": 5391 + }, + { + "epoch": 0.3644720832770042, + "grad_norm": 1.178053617477417, + "learning_rate": 2.2159362773471727e-05, + "loss": 0.24981689453125, + "step": 5392 + }, + { + "epoch": 0.36453967824793837, + "grad_norm": 0.7332862019538879, + "learning_rate": 2.215646315339972e-05, + "loss": 0.1495513916015625, + "step": 5393 + }, + { + "epoch": 0.36460727321887254, + "grad_norm": 0.7054765820503235, + "learning_rate": 2.215356318705351e-05, + "loss": 0.219573974609375, + "step": 5394 + }, + { + "epoch": 0.36467486818980666, + "grad_norm": 0.198336660861969, + "learning_rate": 2.2150662874573428e-05, + "loss": 0.0346527099609375, + "step": 5395 + }, + { + "epoch": 0.3647424631607408, + "grad_norm": 0.3549911379814148, + "learning_rate": 2.2147762216099807e-05, + "loss": 0.0711517333984375, + "step": 5396 + }, + { + "epoch": 0.364810058131675, + "grad_norm": 0.21080301702022552, + "learning_rate": 2.2144861211772998e-05, + "loss": 0.05335235595703125, + "step": 5397 + }, + { + "epoch": 0.36487765310260917, + "grad_norm": 0.16938501596450806, + "learning_rate": 2.214195986173337e-05, + "loss": 0.0286712646484375, + "step": 5398 + }, + { + "epoch": 0.36494524807354334, + "grad_norm": 0.6404519081115723, + "learning_rate": 2.2139058166121306e-05, + "loss": 0.1319427490234375, + "step": 5399 + }, + { + "epoch": 0.3650128430444775, + "grad_norm": 0.4483766555786133, + "learning_rate": 2.2136156125077218e-05, + "loss": 0.08284759521484375, + "step": 5400 + }, + { + "epoch": 0.3650804380154117, + "grad_norm": 0.7134947776794434, + "learning_rate": 2.213325373874151e-05, + "loss": 0.1769561767578125, + "step": 5401 + }, + { + "epoch": 0.3651480329863458, + "grad_norm": 0.6795041561126709, + "learning_rate": 2.213035100725463e-05, + "loss": 0.11377716064453125, + "step": 5402 + }, + { + "epoch": 0.36521562795727996, + "grad_norm": 0.8402004241943359, + "learning_rate": 2.2127447930757024e-05, + "loss": 0.11354827880859375, + "step": 5403 + }, + { + "epoch": 0.36528322292821414, + "grad_norm": 1.092859148979187, + "learning_rate": 2.212454450938916e-05, + "loss": 0.1302642822265625, + "step": 5404 + }, + { + "epoch": 0.3653508178991483, + "grad_norm": 0.30368873476982117, + "learning_rate": 2.2121640743291528e-05, + "loss": 0.0638885498046875, + "step": 5405 + }, + { + "epoch": 0.3654184128700825, + "grad_norm": 1.243125081062317, + "learning_rate": 2.2118736632604626e-05, + "loss": 0.212982177734375, + "step": 5406 + }, + { + "epoch": 0.36548600784101665, + "grad_norm": 0.4491220712661743, + "learning_rate": 2.2115832177468974e-05, + "loss": 0.07859039306640625, + "step": 5407 + }, + { + "epoch": 0.3655536028119508, + "grad_norm": 0.9686809778213501, + "learning_rate": 2.211292737802511e-05, + "loss": 0.194580078125, + "step": 5408 + }, + { + "epoch": 0.36562119778288493, + "grad_norm": 0.7044020295143127, + "learning_rate": 2.2110022234413587e-05, + "loss": 0.141998291015625, + "step": 5409 + }, + { + "epoch": 0.3656887927538191, + "grad_norm": 0.3432333171367645, + "learning_rate": 2.2107116746774967e-05, + "loss": 0.05712890625, + "step": 5410 + }, + { + "epoch": 0.3657563877247533, + "grad_norm": 0.5239614844322205, + "learning_rate": 2.210421091524984e-05, + "loss": 0.1262054443359375, + "step": 5411 + }, + { + "epoch": 0.36582398269568744, + "grad_norm": 1.1477397680282593, + "learning_rate": 2.210130473997881e-05, + "loss": 0.19366455078125, + "step": 5412 + }, + { + "epoch": 0.3658915776666216, + "grad_norm": 0.7447606325149536, + "learning_rate": 2.2098398221102494e-05, + "loss": 0.12896728515625, + "step": 5413 + }, + { + "epoch": 0.3659591726375558, + "grad_norm": 0.7055654525756836, + "learning_rate": 2.209549135876153e-05, + "loss": 0.09247589111328125, + "step": 5414 + }, + { + "epoch": 0.36602676760848996, + "grad_norm": 0.3614010512828827, + "learning_rate": 2.2092584153096566e-05, + "loss": 0.08699798583984375, + "step": 5415 + }, + { + "epoch": 0.36609436257942407, + "grad_norm": 1.2920087575912476, + "learning_rate": 2.2089676604248274e-05, + "loss": 0.22873687744140625, + "step": 5416 + }, + { + "epoch": 0.36616195755035824, + "grad_norm": 1.2419155836105347, + "learning_rate": 2.2086768712357334e-05, + "loss": 0.1837615966796875, + "step": 5417 + }, + { + "epoch": 0.3662295525212924, + "grad_norm": 0.4733882546424866, + "learning_rate": 2.2083860477564454e-05, + "loss": 0.0764312744140625, + "step": 5418 + }, + { + "epoch": 0.3662971474922266, + "grad_norm": 0.387825608253479, + "learning_rate": 2.208095190001035e-05, + "loss": 0.0826568603515625, + "step": 5419 + }, + { + "epoch": 0.36636474246316075, + "grad_norm": 0.27093982696533203, + "learning_rate": 2.2078042979835753e-05, + "loss": 0.026185989379882812, + "step": 5420 + }, + { + "epoch": 0.3664323374340949, + "grad_norm": 0.5446076393127441, + "learning_rate": 2.2075133717181426e-05, + "loss": 0.1070556640625, + "step": 5421 + }, + { + "epoch": 0.36649993240502904, + "grad_norm": 0.6298550963401794, + "learning_rate": 2.2072224112188126e-05, + "loss": 0.11357498168945312, + "step": 5422 + }, + { + "epoch": 0.3665675273759632, + "grad_norm": 0.36525580286979675, + "learning_rate": 2.206931416499664e-05, + "loss": 0.0783538818359375, + "step": 5423 + }, + { + "epoch": 0.3666351223468974, + "grad_norm": 0.4466531276702881, + "learning_rate": 2.206640387574777e-05, + "loss": 0.085052490234375, + "step": 5424 + }, + { + "epoch": 0.36670271731783155, + "grad_norm": 0.5590576529502869, + "learning_rate": 2.206349324458234e-05, + "loss": 0.129547119140625, + "step": 5425 + }, + { + "epoch": 0.3667703122887657, + "grad_norm": 0.902271568775177, + "learning_rate": 2.206058227164118e-05, + "loss": 0.1630706787109375, + "step": 5426 + }, + { + "epoch": 0.3668379072596999, + "grad_norm": 0.3465512990951538, + "learning_rate": 2.2057670957065138e-05, + "loss": 0.06845855712890625, + "step": 5427 + }, + { + "epoch": 0.36690550223063406, + "grad_norm": 1.4985343217849731, + "learning_rate": 2.205475930099508e-05, + "loss": 0.182647705078125, + "step": 5428 + }, + { + "epoch": 0.3669730972015682, + "grad_norm": 0.1614229828119278, + "learning_rate": 2.2051847303571902e-05, + "loss": 0.031429290771484375, + "step": 5429 + }, + { + "epoch": 0.36704069217250235, + "grad_norm": 0.5638197660446167, + "learning_rate": 2.204893496493649e-05, + "loss": 0.083404541015625, + "step": 5430 + }, + { + "epoch": 0.3671082871434365, + "grad_norm": 1.0105267763137817, + "learning_rate": 2.2046022285229772e-05, + "loss": 0.207366943359375, + "step": 5431 + }, + { + "epoch": 0.3671758821143707, + "grad_norm": 0.40821823477745056, + "learning_rate": 2.2043109264592676e-05, + "loss": 0.092132568359375, + "step": 5432 + }, + { + "epoch": 0.36724347708530486, + "grad_norm": 1.581365704536438, + "learning_rate": 2.204019590316615e-05, + "loss": 0.16083526611328125, + "step": 5433 + }, + { + "epoch": 0.36731107205623903, + "grad_norm": 0.6768798232078552, + "learning_rate": 2.2037282201091162e-05, + "loss": 0.16130828857421875, + "step": 5434 + }, + { + "epoch": 0.3673786670271732, + "grad_norm": 0.7820203900337219, + "learning_rate": 2.2034368158508704e-05, + "loss": 0.1615447998046875, + "step": 5435 + }, + { + "epoch": 0.3674462619981073, + "grad_norm": 0.29930752515792847, + "learning_rate": 2.2031453775559764e-05, + "loss": 0.0499267578125, + "step": 5436 + }, + { + "epoch": 0.3675138569690415, + "grad_norm": 1.1835190057754517, + "learning_rate": 2.202853905238536e-05, + "loss": 0.238555908203125, + "step": 5437 + }, + { + "epoch": 0.36758145193997566, + "grad_norm": 0.839288055896759, + "learning_rate": 2.202562398912653e-05, + "loss": 0.161834716796875, + "step": 5438 + }, + { + "epoch": 0.3676490469109098, + "grad_norm": 0.822567343711853, + "learning_rate": 2.2022708585924318e-05, + "loss": 0.138336181640625, + "step": 5439 + }, + { + "epoch": 0.367716641881844, + "grad_norm": 0.47481459379196167, + "learning_rate": 2.2019792842919796e-05, + "loss": 0.09765625, + "step": 5440 + }, + { + "epoch": 0.36778423685277817, + "grad_norm": 1.1924697160720825, + "learning_rate": 2.2016876760254036e-05, + "loss": 0.1597442626953125, + "step": 5441 + }, + { + "epoch": 0.36785183182371234, + "grad_norm": 0.5877181887626648, + "learning_rate": 2.2013960338068142e-05, + "loss": 0.1327972412109375, + "step": 5442 + }, + { + "epoch": 0.36791942679464645, + "grad_norm": 0.6068930625915527, + "learning_rate": 2.201104357650323e-05, + "loss": 0.12410354614257812, + "step": 5443 + }, + { + "epoch": 0.3679870217655806, + "grad_norm": 0.38119035959243774, + "learning_rate": 2.200812647570042e-05, + "loss": 0.0639495849609375, + "step": 5444 + }, + { + "epoch": 0.3680546167365148, + "grad_norm": 0.20048341155052185, + "learning_rate": 2.200520903580087e-05, + "loss": 0.038875579833984375, + "step": 5445 + }, + { + "epoch": 0.36812221170744897, + "grad_norm": 0.3975336253643036, + "learning_rate": 2.2002291256945747e-05, + "loss": 0.109649658203125, + "step": 5446 + }, + { + "epoch": 0.36818980667838314, + "grad_norm": 0.8811437487602234, + "learning_rate": 2.1999373139276226e-05, + "loss": 0.14171600341796875, + "step": 5447 + }, + { + "epoch": 0.3682574016493173, + "grad_norm": 0.3071044385433197, + "learning_rate": 2.1996454682933503e-05, + "loss": 0.06270599365234375, + "step": 5448 + }, + { + "epoch": 0.3683249966202515, + "grad_norm": 0.537386953830719, + "learning_rate": 2.1993535888058793e-05, + "loss": 0.10826873779296875, + "step": 5449 + }, + { + "epoch": 0.3683925915911856, + "grad_norm": 0.24991169571876526, + "learning_rate": 2.199061675479332e-05, + "loss": 0.0407867431640625, + "step": 5450 + }, + { + "epoch": 0.36846018656211976, + "grad_norm": 0.7692686915397644, + "learning_rate": 2.198769728327834e-05, + "loss": 0.13954925537109375, + "step": 5451 + }, + { + "epoch": 0.36852778153305393, + "grad_norm": 0.8478180766105652, + "learning_rate": 2.1984777473655105e-05, + "loss": 0.127716064453125, + "step": 5452 + }, + { + "epoch": 0.3685953765039881, + "grad_norm": 0.9322066307067871, + "learning_rate": 2.19818573260649e-05, + "loss": 0.17767333984375, + "step": 5453 + }, + { + "epoch": 0.3686629714749223, + "grad_norm": 0.7344959378242493, + "learning_rate": 2.1978936840649015e-05, + "loss": 0.18975830078125, + "step": 5454 + }, + { + "epoch": 0.36873056644585644, + "grad_norm": 0.324031800031662, + "learning_rate": 2.1976016017548766e-05, + "loss": 0.0597991943359375, + "step": 5455 + }, + { + "epoch": 0.3687981614167906, + "grad_norm": 0.47402623295783997, + "learning_rate": 2.1973094856905484e-05, + "loss": 0.12384033203125, + "step": 5456 + }, + { + "epoch": 0.36886575638772473, + "grad_norm": 0.22519174218177795, + "learning_rate": 2.19701733588605e-05, + "loss": 0.038684844970703125, + "step": 5457 + }, + { + "epoch": 0.3689333513586589, + "grad_norm": 0.8670945763587952, + "learning_rate": 2.1967251523555183e-05, + "loss": 0.1212615966796875, + "step": 5458 + }, + { + "epoch": 0.36900094632959307, + "grad_norm": 0.5269327759742737, + "learning_rate": 2.1964329351130907e-05, + "loss": 0.10014724731445312, + "step": 5459 + }, + { + "epoch": 0.36906854130052724, + "grad_norm": 0.37109673023223877, + "learning_rate": 2.1961406841729074e-05, + "loss": 0.06497955322265625, + "step": 5460 + }, + { + "epoch": 0.3691361362714614, + "grad_norm": 0.4335831105709076, + "learning_rate": 2.195848399549108e-05, + "loss": 0.077728271484375, + "step": 5461 + }, + { + "epoch": 0.3692037312423956, + "grad_norm": 1.093798041343689, + "learning_rate": 2.195556081255835e-05, + "loss": 0.1366424560546875, + "step": 5462 + }, + { + "epoch": 0.36927132621332975, + "grad_norm": 0.7087734341621399, + "learning_rate": 2.1952637293072344e-05, + "loss": 0.14617156982421875, + "step": 5463 + }, + { + "epoch": 0.36933892118426387, + "grad_norm": 0.20289431512355804, + "learning_rate": 2.1949713437174504e-05, + "loss": 0.026294708251953125, + "step": 5464 + }, + { + "epoch": 0.36940651615519804, + "grad_norm": 0.7697696089744568, + "learning_rate": 2.1946789245006304e-05, + "loss": 0.1563720703125, + "step": 5465 + }, + { + "epoch": 0.3694741111261322, + "grad_norm": 0.719605565071106, + "learning_rate": 2.1943864716709237e-05, + "loss": 0.10018157958984375, + "step": 5466 + }, + { + "epoch": 0.3695417060970664, + "grad_norm": 0.38031214475631714, + "learning_rate": 2.194093985242482e-05, + "loss": 0.1012115478515625, + "step": 5467 + }, + { + "epoch": 0.36960930106800055, + "grad_norm": 0.9281715154647827, + "learning_rate": 2.1938014652294564e-05, + "loss": 0.182830810546875, + "step": 5468 + }, + { + "epoch": 0.3696768960389347, + "grad_norm": 0.3289642632007599, + "learning_rate": 2.193508911646001e-05, + "loss": 0.083740234375, + "step": 5469 + }, + { + "epoch": 0.3697444910098689, + "grad_norm": 0.27849575877189636, + "learning_rate": 2.1932163245062714e-05, + "loss": 0.06439208984375, + "step": 5470 + }, + { + "epoch": 0.369812085980803, + "grad_norm": 0.8719614148139954, + "learning_rate": 2.1929237038244254e-05, + "loss": 0.1237030029296875, + "step": 5471 + }, + { + "epoch": 0.3698796809517372, + "grad_norm": 0.5461570024490356, + "learning_rate": 2.1926310496146213e-05, + "loss": 0.1106719970703125, + "step": 5472 + }, + { + "epoch": 0.36994727592267135, + "grad_norm": 0.7735540270805359, + "learning_rate": 2.1923383618910194e-05, + "loss": 0.151336669921875, + "step": 5473 + }, + { + "epoch": 0.3700148708936055, + "grad_norm": 0.2760660648345947, + "learning_rate": 2.1920456406677822e-05, + "loss": 0.045162200927734375, + "step": 5474 + }, + { + "epoch": 0.3700824658645397, + "grad_norm": 0.730156421661377, + "learning_rate": 2.1917528859590727e-05, + "loss": 0.1319122314453125, + "step": 5475 + }, + { + "epoch": 0.37015006083547386, + "grad_norm": 0.4550454914569855, + "learning_rate": 2.191460097779057e-05, + "loss": 0.05859565734863281, + "step": 5476 + }, + { + "epoch": 0.37021765580640803, + "grad_norm": 0.44521304965019226, + "learning_rate": 2.191167276141902e-05, + "loss": 0.08121490478515625, + "step": 5477 + }, + { + "epoch": 0.37028525077734215, + "grad_norm": 0.39617103338241577, + "learning_rate": 2.190874421061775e-05, + "loss": 0.04766845703125, + "step": 5478 + }, + { + "epoch": 0.3703528457482763, + "grad_norm": 0.31215181946754456, + "learning_rate": 2.190581532552847e-05, + "loss": 0.046627044677734375, + "step": 5479 + }, + { + "epoch": 0.3704204407192105, + "grad_norm": 0.59786456823349, + "learning_rate": 2.1902886106292906e-05, + "loss": 0.110687255859375, + "step": 5480 + }, + { + "epoch": 0.37048803569014466, + "grad_norm": 0.5889852643013, + "learning_rate": 2.189995655305278e-05, + "loss": 0.11690521240234375, + "step": 5481 + }, + { + "epoch": 0.3705556306610788, + "grad_norm": 0.1992567479610443, + "learning_rate": 2.189702666594984e-05, + "loss": 0.038959503173828125, + "step": 5482 + }, + { + "epoch": 0.370623225632013, + "grad_norm": 0.18737877905368805, + "learning_rate": 2.1894096445125863e-05, + "loss": 0.028621673583984375, + "step": 5483 + }, + { + "epoch": 0.3706908206029471, + "grad_norm": 0.6514539122581482, + "learning_rate": 2.1891165890722628e-05, + "loss": 0.1501922607421875, + "step": 5484 + }, + { + "epoch": 0.3707584155738813, + "grad_norm": 0.4762742519378662, + "learning_rate": 2.188823500288193e-05, + "loss": 0.08289337158203125, + "step": 5485 + }, + { + "epoch": 0.37082601054481545, + "grad_norm": 0.39258986711502075, + "learning_rate": 2.1885303781745586e-05, + "loss": 0.07349777221679688, + "step": 5486 + }, + { + "epoch": 0.3708936055157496, + "grad_norm": 1.1132797002792358, + "learning_rate": 2.188237222745542e-05, + "loss": 0.1836395263671875, + "step": 5487 + }, + { + "epoch": 0.3709612004866838, + "grad_norm": 1.3103986978530884, + "learning_rate": 2.187944034015329e-05, + "loss": 0.257293701171875, + "step": 5488 + }, + { + "epoch": 0.37102879545761797, + "grad_norm": 0.36131802201271057, + "learning_rate": 2.1876508119981053e-05, + "loss": 0.07001495361328125, + "step": 5489 + }, + { + "epoch": 0.37109639042855214, + "grad_norm": 0.5452786087989807, + "learning_rate": 2.1873575567080584e-05, + "loss": 0.14252853393554688, + "step": 5490 + }, + { + "epoch": 0.37116398539948625, + "grad_norm": 0.24872766435146332, + "learning_rate": 2.1870642681593787e-05, + "loss": 0.0423126220703125, + "step": 5491 + }, + { + "epoch": 0.3712315803704204, + "grad_norm": 0.6193084120750427, + "learning_rate": 2.1867709463662568e-05, + "loss": 0.1626129150390625, + "step": 5492 + }, + { + "epoch": 0.3712991753413546, + "grad_norm": 0.6171354651451111, + "learning_rate": 2.1864775913428857e-05, + "loss": 0.13531494140625, + "step": 5493 + }, + { + "epoch": 0.37136677031228876, + "grad_norm": 0.7258888483047485, + "learning_rate": 2.1861842031034596e-05, + "loss": 0.15079498291015625, + "step": 5494 + }, + { + "epoch": 0.37143436528322293, + "grad_norm": 0.6354297399520874, + "learning_rate": 2.185890781662174e-05, + "loss": 0.160369873046875, + "step": 5495 + }, + { + "epoch": 0.3715019602541571, + "grad_norm": 0.5672199726104736, + "learning_rate": 2.185597327033227e-05, + "loss": 0.08198165893554688, + "step": 5496 + }, + { + "epoch": 0.3715695552250913, + "grad_norm": 0.45028218626976013, + "learning_rate": 2.1853038392308178e-05, + "loss": 0.07489013671875, + "step": 5497 + }, + { + "epoch": 0.3716371501960254, + "grad_norm": 0.387972891330719, + "learning_rate": 2.1850103182691472e-05, + "loss": 0.10155105590820312, + "step": 5498 + }, + { + "epoch": 0.37170474516695956, + "grad_norm": 0.6950820088386536, + "learning_rate": 2.184716764162417e-05, + "loss": 0.162261962890625, + "step": 5499 + }, + { + "epoch": 0.37177234013789373, + "grad_norm": 0.5301383137702942, + "learning_rate": 2.184423176924832e-05, + "loss": 0.1089630126953125, + "step": 5500 + }, + { + "epoch": 0.3718399351088279, + "grad_norm": 0.8994618654251099, + "learning_rate": 2.184129556570597e-05, + "loss": 0.21844482421875, + "step": 5501 + }, + { + "epoch": 0.37190753007976207, + "grad_norm": 0.34819692373275757, + "learning_rate": 2.1838359031139193e-05, + "loss": 0.07171630859375, + "step": 5502 + }, + { + "epoch": 0.37197512505069624, + "grad_norm": 0.3539295494556427, + "learning_rate": 2.1835422165690085e-05, + "loss": 0.0674591064453125, + "step": 5503 + }, + { + "epoch": 0.3720427200216304, + "grad_norm": 0.5153194665908813, + "learning_rate": 2.1832484969500733e-05, + "loss": 0.125213623046875, + "step": 5504 + }, + { + "epoch": 0.37211031499256453, + "grad_norm": 0.3701818287372589, + "learning_rate": 2.1829547442713276e-05, + "loss": 0.068511962890625, + "step": 5505 + }, + { + "epoch": 0.3721779099634987, + "grad_norm": 0.8922320604324341, + "learning_rate": 2.1826609585469837e-05, + "loss": 0.13614654541015625, + "step": 5506 + }, + { + "epoch": 0.37224550493443287, + "grad_norm": 0.7534810900688171, + "learning_rate": 2.1823671397912575e-05, + "loss": 0.1522674560546875, + "step": 5507 + }, + { + "epoch": 0.37231309990536704, + "grad_norm": 0.8847188949584961, + "learning_rate": 2.1820732880183652e-05, + "loss": 0.160247802734375, + "step": 5508 + }, + { + "epoch": 0.3723806948763012, + "grad_norm": 1.2206852436065674, + "learning_rate": 2.1817794032425258e-05, + "loss": 0.1501922607421875, + "step": 5509 + }, + { + "epoch": 0.3724482898472354, + "grad_norm": 0.7980057597160339, + "learning_rate": 2.181485485477959e-05, + "loss": 0.14410400390625, + "step": 5510 + }, + { + "epoch": 0.37251588481816955, + "grad_norm": 0.623542845249176, + "learning_rate": 2.1811915347388864e-05, + "loss": 0.1766357421875, + "step": 5511 + }, + { + "epoch": 0.37258347978910367, + "grad_norm": 0.47827380895614624, + "learning_rate": 2.1808975510395304e-05, + "loss": 0.07981109619140625, + "step": 5512 + }, + { + "epoch": 0.37265107476003784, + "grad_norm": 0.8035908341407776, + "learning_rate": 2.1806035343941164e-05, + "loss": 0.1224365234375, + "step": 5513 + }, + { + "epoch": 0.372718669730972, + "grad_norm": 0.6799967885017395, + "learning_rate": 2.180309484816871e-05, + "loss": 0.132415771484375, + "step": 5514 + }, + { + "epoch": 0.3727862647019062, + "grad_norm": 0.9726617336273193, + "learning_rate": 2.1800154023220225e-05, + "loss": 0.12601089477539062, + "step": 5515 + }, + { + "epoch": 0.37285385967284035, + "grad_norm": 1.0024011135101318, + "learning_rate": 2.1797212869237992e-05, + "loss": 0.1919403076171875, + "step": 5516 + }, + { + "epoch": 0.3729214546437745, + "grad_norm": 0.5503188967704773, + "learning_rate": 2.1794271386364334e-05, + "loss": 0.102813720703125, + "step": 5517 + }, + { + "epoch": 0.3729890496147087, + "grad_norm": 0.7683295011520386, + "learning_rate": 2.179132957474157e-05, + "loss": 0.1437225341796875, + "step": 5518 + }, + { + "epoch": 0.3730566445856428, + "grad_norm": 0.23562881350517273, + "learning_rate": 2.1788387434512046e-05, + "loss": 0.028079986572265625, + "step": 5519 + }, + { + "epoch": 0.373124239556577, + "grad_norm": 1.0380698442459106, + "learning_rate": 2.178544496581812e-05, + "loss": 0.21868896484375, + "step": 5520 + }, + { + "epoch": 0.37319183452751115, + "grad_norm": 0.9059411287307739, + "learning_rate": 2.178250216880217e-05, + "loss": 0.1586456298828125, + "step": 5521 + }, + { + "epoch": 0.3732594294984453, + "grad_norm": 0.6756942272186279, + "learning_rate": 2.1779559043606586e-05, + "loss": 0.164215087890625, + "step": 5522 + }, + { + "epoch": 0.3733270244693795, + "grad_norm": 0.3826853632926941, + "learning_rate": 2.1776615590373777e-05, + "loss": 0.0756988525390625, + "step": 5523 + }, + { + "epoch": 0.37339461944031366, + "grad_norm": 0.8537523150444031, + "learning_rate": 2.1773671809246162e-05, + "loss": 0.12361526489257812, + "step": 5524 + }, + { + "epoch": 0.37346221441124783, + "grad_norm": 0.6386446952819824, + "learning_rate": 2.1770727700366172e-05, + "loss": 0.11496734619140625, + "step": 5525 + }, + { + "epoch": 0.37352980938218194, + "grad_norm": 1.1485204696655273, + "learning_rate": 2.1767783263876274e-05, + "loss": 0.15791702270507812, + "step": 5526 + }, + { + "epoch": 0.3735974043531161, + "grad_norm": 0.43234020471572876, + "learning_rate": 2.1764838499918933e-05, + "loss": 0.03749275207519531, + "step": 5527 + }, + { + "epoch": 0.3736649993240503, + "grad_norm": 1.5582728385925293, + "learning_rate": 2.1761893408636636e-05, + "loss": 0.1998291015625, + "step": 5528 + }, + { + "epoch": 0.37373259429498445, + "grad_norm": 0.7578460574150085, + "learning_rate": 2.175894799017188e-05, + "loss": 0.1626129150390625, + "step": 5529 + }, + { + "epoch": 0.3738001892659186, + "grad_norm": 0.4162576496601105, + "learning_rate": 2.1756002244667193e-05, + "loss": 0.07781219482421875, + "step": 5530 + }, + { + "epoch": 0.3738677842368528, + "grad_norm": 0.7472171783447266, + "learning_rate": 2.17530561722651e-05, + "loss": 0.1546173095703125, + "step": 5531 + }, + { + "epoch": 0.37393537920778697, + "grad_norm": 0.3618927597999573, + "learning_rate": 2.1750109773108152e-05, + "loss": 0.08893394470214844, + "step": 5532 + }, + { + "epoch": 0.3740029741787211, + "grad_norm": 0.6166192293167114, + "learning_rate": 2.1747163047338916e-05, + "loss": 0.112701416015625, + "step": 5533 + }, + { + "epoch": 0.37407056914965525, + "grad_norm": 0.5429927706718445, + "learning_rate": 2.174421599509997e-05, + "loss": 0.1131591796875, + "step": 5534 + }, + { + "epoch": 0.3741381641205894, + "grad_norm": 0.3765300214290619, + "learning_rate": 2.1741268616533914e-05, + "loss": 0.06947708129882812, + "step": 5535 + }, + { + "epoch": 0.3742057590915236, + "grad_norm": 0.23433610796928406, + "learning_rate": 2.1738320911783357e-05, + "loss": 0.051372528076171875, + "step": 5536 + }, + { + "epoch": 0.37427335406245776, + "grad_norm": 0.2561943829059601, + "learning_rate": 2.173537288099093e-05, + "loss": 0.05786895751953125, + "step": 5537 + }, + { + "epoch": 0.37434094903339193, + "grad_norm": 0.9769989252090454, + "learning_rate": 2.1732424524299277e-05, + "loss": 0.140594482421875, + "step": 5538 + }, + { + "epoch": 0.3744085440043261, + "grad_norm": 0.7181942462921143, + "learning_rate": 2.1729475841851055e-05, + "loss": 0.10214996337890625, + "step": 5539 + }, + { + "epoch": 0.3744761389752602, + "grad_norm": 0.631994366645813, + "learning_rate": 2.1726526833788944e-05, + "loss": 0.13788223266601562, + "step": 5540 + }, + { + "epoch": 0.3745437339461944, + "grad_norm": 0.7175986766815186, + "learning_rate": 2.172357750025563e-05, + "loss": 0.1333160400390625, + "step": 5541 + }, + { + "epoch": 0.37461132891712856, + "grad_norm": 0.7162321209907532, + "learning_rate": 2.1720627841393823e-05, + "loss": 0.191619873046875, + "step": 5542 + }, + { + "epoch": 0.37467892388806273, + "grad_norm": 0.9609103202819824, + "learning_rate": 2.171767785734625e-05, + "loss": 0.21673583984375, + "step": 5543 + }, + { + "epoch": 0.3747465188589969, + "grad_norm": 0.3457437753677368, + "learning_rate": 2.1714727548255642e-05, + "loss": 0.0847320556640625, + "step": 5544 + }, + { + "epoch": 0.3748141138299311, + "grad_norm": 0.3409331142902374, + "learning_rate": 2.171177691426476e-05, + "loss": 0.07332992553710938, + "step": 5545 + }, + { + "epoch": 0.37488170880086524, + "grad_norm": 0.5439273118972778, + "learning_rate": 2.170882595551637e-05, + "loss": 0.0996551513671875, + "step": 5546 + }, + { + "epoch": 0.37494930377179936, + "grad_norm": 0.3574322760105133, + "learning_rate": 2.1705874672153255e-05, + "loss": 0.09243011474609375, + "step": 5547 + }, + { + "epoch": 0.37501689874273353, + "grad_norm": 0.37783706188201904, + "learning_rate": 2.1702923064318222e-05, + "loss": 0.07646751403808594, + "step": 5548 + }, + { + "epoch": 0.3750844937136677, + "grad_norm": 0.2685907483100891, + "learning_rate": 2.1699971132154087e-05, + "loss": 0.05841064453125, + "step": 5549 + }, + { + "epoch": 0.37515208868460187, + "grad_norm": 0.6653422713279724, + "learning_rate": 2.169701887580368e-05, + "loss": 0.13702392578125, + "step": 5550 + }, + { + "epoch": 0.37521968365553604, + "grad_norm": 0.32705631852149963, + "learning_rate": 2.1694066295409852e-05, + "loss": 0.0796051025390625, + "step": 5551 + }, + { + "epoch": 0.3752872786264702, + "grad_norm": 0.4073350727558136, + "learning_rate": 2.1691113391115466e-05, + "loss": 0.08577346801757812, + "step": 5552 + }, + { + "epoch": 0.3753548735974043, + "grad_norm": 1.4248698949813843, + "learning_rate": 2.1688160163063404e-05, + "loss": 0.20647430419921875, + "step": 5553 + }, + { + "epoch": 0.3754224685683385, + "grad_norm": 1.759081244468689, + "learning_rate": 2.1685206611396557e-05, + "loss": 0.239654541015625, + "step": 5554 + }, + { + "epoch": 0.37549006353927267, + "grad_norm": 0.691573441028595, + "learning_rate": 2.1682252736257846e-05, + "loss": 0.108428955078125, + "step": 5555 + }, + { + "epoch": 0.37555765851020684, + "grad_norm": 0.4497806429862976, + "learning_rate": 2.1679298537790185e-05, + "loss": 0.101348876953125, + "step": 5556 + }, + { + "epoch": 0.375625253481141, + "grad_norm": 0.37938567996025085, + "learning_rate": 2.1676344016136528e-05, + "loss": 0.08482742309570312, + "step": 5557 + }, + { + "epoch": 0.3756928484520752, + "grad_norm": 0.5036889910697937, + "learning_rate": 2.1673389171439826e-05, + "loss": 0.16571044921875, + "step": 5558 + }, + { + "epoch": 0.37576044342300935, + "grad_norm": 0.28122252225875854, + "learning_rate": 2.1670434003843054e-05, + "loss": 0.043346405029296875, + "step": 5559 + }, + { + "epoch": 0.37582803839394346, + "grad_norm": 1.3024243116378784, + "learning_rate": 2.16674785134892e-05, + "loss": 0.164306640625, + "step": 5560 + }, + { + "epoch": 0.37589563336487763, + "grad_norm": 0.4034838080406189, + "learning_rate": 2.166452270052127e-05, + "loss": 0.0821990966796875, + "step": 5561 + }, + { + "epoch": 0.3759632283358118, + "grad_norm": 0.36497756838798523, + "learning_rate": 2.166156656508229e-05, + "loss": 0.077545166015625, + "step": 5562 + }, + { + "epoch": 0.376030823306746, + "grad_norm": 0.8204675316810608, + "learning_rate": 2.165861010731529e-05, + "loss": 0.155548095703125, + "step": 5563 + }, + { + "epoch": 0.37609841827768015, + "grad_norm": 0.7047937512397766, + "learning_rate": 2.1655653327363324e-05, + "loss": 0.1597442626953125, + "step": 5564 + }, + { + "epoch": 0.3761660132486143, + "grad_norm": 0.7855428457260132, + "learning_rate": 2.1652696225369456e-05, + "loss": 0.1639556884765625, + "step": 5565 + }, + { + "epoch": 0.3762336082195485, + "grad_norm": 0.3408209979534149, + "learning_rate": 2.1649738801476775e-05, + "loss": 0.045318603515625, + "step": 5566 + }, + { + "epoch": 0.3763012031904826, + "grad_norm": 0.8771585822105408, + "learning_rate": 2.164678105582837e-05, + "loss": 0.1592559814453125, + "step": 5567 + }, + { + "epoch": 0.3763687981614168, + "grad_norm": 0.20391200482845306, + "learning_rate": 2.1643822988567373e-05, + "loss": 0.05185699462890625, + "step": 5568 + }, + { + "epoch": 0.37643639313235094, + "grad_norm": 0.2582246661186218, + "learning_rate": 2.1640864599836898e-05, + "loss": 0.0396728515625, + "step": 5569 + }, + { + "epoch": 0.3765039881032851, + "grad_norm": 0.11611687391996384, + "learning_rate": 2.1637905889780093e-05, + "loss": 0.017732620239257812, + "step": 5570 + }, + { + "epoch": 0.3765715830742193, + "grad_norm": 1.0285892486572266, + "learning_rate": 2.1634946858540118e-05, + "loss": 0.166595458984375, + "step": 5571 + }, + { + "epoch": 0.37663917804515346, + "grad_norm": 0.4120810627937317, + "learning_rate": 2.1631987506260154e-05, + "loss": 0.06665802001953125, + "step": 5572 + }, + { + "epoch": 0.3767067730160876, + "grad_norm": 0.4394300878047943, + "learning_rate": 2.1629027833083388e-05, + "loss": 0.0802764892578125, + "step": 5573 + }, + { + "epoch": 0.37677436798702174, + "grad_norm": 1.07937753200531, + "learning_rate": 2.1626067839153036e-05, + "loss": 0.141571044921875, + "step": 5574 + }, + { + "epoch": 0.3768419629579559, + "grad_norm": 1.098175048828125, + "learning_rate": 2.1623107524612308e-05, + "loss": 0.195098876953125, + "step": 5575 + }, + { + "epoch": 0.3769095579288901, + "grad_norm": 0.38281771540641785, + "learning_rate": 2.162014688960445e-05, + "loss": 0.09649848937988281, + "step": 5576 + }, + { + "epoch": 0.37697715289982425, + "grad_norm": 0.2712440490722656, + "learning_rate": 2.1617185934272722e-05, + "loss": 0.039920806884765625, + "step": 5577 + }, + { + "epoch": 0.3770447478707584, + "grad_norm": 0.9259233474731445, + "learning_rate": 2.161422465876038e-05, + "loss": 0.1411895751953125, + "step": 5578 + }, + { + "epoch": 0.3771123428416926, + "grad_norm": 1.2501869201660156, + "learning_rate": 2.161126306321072e-05, + "loss": 0.216156005859375, + "step": 5579 + }, + { + "epoch": 0.37717993781262676, + "grad_norm": 1.025457739830017, + "learning_rate": 2.1608301147767036e-05, + "loss": 0.1759490966796875, + "step": 5580 + }, + { + "epoch": 0.3772475327835609, + "grad_norm": 0.816177248954773, + "learning_rate": 2.160533891257265e-05, + "loss": 0.1734619140625, + "step": 5581 + }, + { + "epoch": 0.37731512775449505, + "grad_norm": 0.5341313481330872, + "learning_rate": 2.160237635777088e-05, + "loss": 0.09996795654296875, + "step": 5582 + }, + { + "epoch": 0.3773827227254292, + "grad_norm": 0.333137571811676, + "learning_rate": 2.159941348350509e-05, + "loss": 0.05730438232421875, + "step": 5583 + }, + { + "epoch": 0.3774503176963634, + "grad_norm": 1.1991163492202759, + "learning_rate": 2.1596450289918628e-05, + "loss": 0.2166748046875, + "step": 5584 + }, + { + "epoch": 0.37751791266729756, + "grad_norm": 1.2171666622161865, + "learning_rate": 2.1593486777154885e-05, + "loss": 0.1368408203125, + "step": 5585 + }, + { + "epoch": 0.37758550763823173, + "grad_norm": 1.4673696756362915, + "learning_rate": 2.1590522945357246e-05, + "loss": 0.25537109375, + "step": 5586 + }, + { + "epoch": 0.3776531026091659, + "grad_norm": 2.239105463027954, + "learning_rate": 2.1587558794669118e-05, + "loss": 0.1693572998046875, + "step": 5587 + }, + { + "epoch": 0.3777206975801, + "grad_norm": 0.6480342745780945, + "learning_rate": 2.1584594325233926e-05, + "loss": 0.13458251953125, + "step": 5588 + }, + { + "epoch": 0.3777882925510342, + "grad_norm": 0.2815629243850708, + "learning_rate": 2.1581629537195115e-05, + "loss": 0.050662994384765625, + "step": 5589 + }, + { + "epoch": 0.37785588752196836, + "grad_norm": 0.3922715485095978, + "learning_rate": 2.1578664430696134e-05, + "loss": 0.069061279296875, + "step": 5590 + }, + { + "epoch": 0.37792348249290253, + "grad_norm": 0.6134368777275085, + "learning_rate": 2.157569900588046e-05, + "loss": 0.10877227783203125, + "step": 5591 + }, + { + "epoch": 0.3779910774638367, + "grad_norm": 1.6864856481552124, + "learning_rate": 2.1572733262891567e-05, + "loss": 0.155670166015625, + "step": 5592 + }, + { + "epoch": 0.37805867243477087, + "grad_norm": 0.7777236104011536, + "learning_rate": 2.156976720187297e-05, + "loss": 0.1091461181640625, + "step": 5593 + }, + { + "epoch": 0.37812626740570504, + "grad_norm": 0.45005902647972107, + "learning_rate": 2.156680082296818e-05, + "loss": 0.0950927734375, + "step": 5594 + }, + { + "epoch": 0.37819386237663916, + "grad_norm": 1.1631945371627808, + "learning_rate": 2.1563834126320725e-05, + "loss": 0.182037353515625, + "step": 5595 + }, + { + "epoch": 0.3782614573475733, + "grad_norm": 0.467825710773468, + "learning_rate": 2.156086711207415e-05, + "loss": 0.08467864990234375, + "step": 5596 + }, + { + "epoch": 0.3783290523185075, + "grad_norm": 0.602304995059967, + "learning_rate": 2.1557899780372027e-05, + "loss": 0.12570953369140625, + "step": 5597 + }, + { + "epoch": 0.37839664728944167, + "grad_norm": 0.6327909231185913, + "learning_rate": 2.1554932131357927e-05, + "loss": 0.1111907958984375, + "step": 5598 + }, + { + "epoch": 0.37846424226037584, + "grad_norm": 0.35140860080718994, + "learning_rate": 2.155196416517545e-05, + "loss": 0.095794677734375, + "step": 5599 + }, + { + "epoch": 0.37853183723131, + "grad_norm": 1.0927772521972656, + "learning_rate": 2.1548995881968197e-05, + "loss": 0.19677734375, + "step": 5600 + }, + { + "epoch": 0.3785994322022442, + "grad_norm": 0.977857768535614, + "learning_rate": 2.15460272818798e-05, + "loss": 0.155731201171875, + "step": 5601 + }, + { + "epoch": 0.3786670271731783, + "grad_norm": 0.7261026501655579, + "learning_rate": 2.154305836505389e-05, + "loss": 0.145660400390625, + "step": 5602 + }, + { + "epoch": 0.37873462214411246, + "grad_norm": 0.6525740623474121, + "learning_rate": 2.1540089131634125e-05, + "loss": 0.1431884765625, + "step": 5603 + }, + { + "epoch": 0.37880221711504664, + "grad_norm": 0.6829203367233276, + "learning_rate": 2.1537119581764176e-05, + "loss": 0.14853668212890625, + "step": 5604 + }, + { + "epoch": 0.3788698120859808, + "grad_norm": 0.5925308465957642, + "learning_rate": 2.1534149715587727e-05, + "loss": 0.1630096435546875, + "step": 5605 + }, + { + "epoch": 0.378937407056915, + "grad_norm": 0.4360998570919037, + "learning_rate": 2.153117953324848e-05, + "loss": 0.0715179443359375, + "step": 5606 + }, + { + "epoch": 0.37900500202784915, + "grad_norm": 0.4496217668056488, + "learning_rate": 2.1528209034890154e-05, + "loss": 0.089447021484375, + "step": 5607 + }, + { + "epoch": 0.3790725969987833, + "grad_norm": 0.28387054800987244, + "learning_rate": 2.1525238220656473e-05, + "loss": 0.04630279541015625, + "step": 5608 + }, + { + "epoch": 0.37914019196971743, + "grad_norm": 0.36760860681533813, + "learning_rate": 2.1522267090691186e-05, + "loss": 0.08430862426757812, + "step": 5609 + }, + { + "epoch": 0.3792077869406516, + "grad_norm": 0.7647194862365723, + "learning_rate": 2.1519295645138055e-05, + "loss": 0.1506500244140625, + "step": 5610 + }, + { + "epoch": 0.3792753819115858, + "grad_norm": 0.26707884669303894, + "learning_rate": 2.1516323884140863e-05, + "loss": 0.04155731201171875, + "step": 5611 + }, + { + "epoch": 0.37934297688251994, + "grad_norm": 1.21089506149292, + "learning_rate": 2.1513351807843395e-05, + "loss": 0.2252197265625, + "step": 5612 + }, + { + "epoch": 0.3794105718534541, + "grad_norm": 0.458950936794281, + "learning_rate": 2.1510379416389453e-05, + "loss": 0.10662078857421875, + "step": 5613 + }, + { + "epoch": 0.3794781668243883, + "grad_norm": 0.22241881489753723, + "learning_rate": 2.1507406709922874e-05, + "loss": 0.038962364196777344, + "step": 5614 + }, + { + "epoch": 0.3795457617953224, + "grad_norm": 0.5933756232261658, + "learning_rate": 2.1504433688587493e-05, + "loss": 0.11908721923828125, + "step": 5615 + }, + { + "epoch": 0.37961335676625657, + "grad_norm": 0.3020838797092438, + "learning_rate": 2.1501460352527152e-05, + "loss": 0.047842979431152344, + "step": 5616 + }, + { + "epoch": 0.37968095173719074, + "grad_norm": 2.1538407802581787, + "learning_rate": 2.1498486701885726e-05, + "loss": 0.2744140625, + "step": 5617 + }, + { + "epoch": 0.3797485467081249, + "grad_norm": 0.6568519473075867, + "learning_rate": 2.1495512736807107e-05, + "loss": 0.08810043334960938, + "step": 5618 + }, + { + "epoch": 0.3798161416790591, + "grad_norm": 0.6895673274993896, + "learning_rate": 2.149253845743518e-05, + "loss": 0.1106414794921875, + "step": 5619 + }, + { + "epoch": 0.37988373664999325, + "grad_norm": 0.7423906326293945, + "learning_rate": 2.1489563863913874e-05, + "loss": 0.158721923828125, + "step": 5620 + }, + { + "epoch": 0.3799513316209274, + "grad_norm": 0.3143244981765747, + "learning_rate": 2.14865889563871e-05, + "loss": 0.028400421142578125, + "step": 5621 + }, + { + "epoch": 0.38001892659186154, + "grad_norm": 1.4486905336380005, + "learning_rate": 2.1483613734998818e-05, + "loss": 0.22589111328125, + "step": 5622 + }, + { + "epoch": 0.3800865215627957, + "grad_norm": 0.24873825907707214, + "learning_rate": 2.1480638199892982e-05, + "loss": 0.041412353515625, + "step": 5623 + }, + { + "epoch": 0.3801541165337299, + "grad_norm": 1.2073872089385986, + "learning_rate": 2.1477662351213567e-05, + "loss": 0.244110107421875, + "step": 5624 + }, + { + "epoch": 0.38022171150466405, + "grad_norm": 0.7615481615066528, + "learning_rate": 2.1474686189104566e-05, + "loss": 0.14467620849609375, + "step": 5625 + }, + { + "epoch": 0.3802893064755982, + "grad_norm": 0.9644521474838257, + "learning_rate": 2.1471709713709976e-05, + "loss": 0.1659698486328125, + "step": 5626 + }, + { + "epoch": 0.3803569014465324, + "grad_norm": 0.2241670936346054, + "learning_rate": 2.1468732925173825e-05, + "loss": 0.03467559814453125, + "step": 5627 + }, + { + "epoch": 0.38042449641746656, + "grad_norm": 0.3431559205055237, + "learning_rate": 2.146575582364015e-05, + "loss": 0.050891876220703125, + "step": 5628 + }, + { + "epoch": 0.3804920913884007, + "grad_norm": 1.113408088684082, + "learning_rate": 2.1462778409252995e-05, + "loss": 0.15643310546875, + "step": 5629 + }, + { + "epoch": 0.38055968635933485, + "grad_norm": 0.7920358777046204, + "learning_rate": 2.145980068215643e-05, + "loss": 0.24798583984375, + "step": 5630 + }, + { + "epoch": 0.380627281330269, + "grad_norm": 0.4339991807937622, + "learning_rate": 2.1456822642494532e-05, + "loss": 0.09288787841796875, + "step": 5631 + }, + { + "epoch": 0.3806948763012032, + "grad_norm": 0.8062430024147034, + "learning_rate": 2.1453844290411404e-05, + "loss": 0.1735076904296875, + "step": 5632 + }, + { + "epoch": 0.38076247127213736, + "grad_norm": 0.3315524160861969, + "learning_rate": 2.145086562605115e-05, + "loss": 0.048980712890625, + "step": 5633 + }, + { + "epoch": 0.38083006624307153, + "grad_norm": 0.27083197236061096, + "learning_rate": 2.1447886649557902e-05, + "loss": 0.0431060791015625, + "step": 5634 + }, + { + "epoch": 0.3808976612140057, + "grad_norm": 0.5517468452453613, + "learning_rate": 2.1444907361075794e-05, + "loss": 0.12127685546875, + "step": 5635 + }, + { + "epoch": 0.3809652561849398, + "grad_norm": 0.32901719212532043, + "learning_rate": 2.1441927760748994e-05, + "loss": 0.04984283447265625, + "step": 5636 + }, + { + "epoch": 0.381032851155874, + "grad_norm": 0.4409439265727997, + "learning_rate": 2.1438947848721664e-05, + "loss": 0.07049560546875, + "step": 5637 + }, + { + "epoch": 0.38110044612680816, + "grad_norm": 0.2666603624820709, + "learning_rate": 2.1435967625137992e-05, + "loss": 0.04785919189453125, + "step": 5638 + }, + { + "epoch": 0.3811680410977423, + "grad_norm": 0.3670293986797333, + "learning_rate": 2.143298709014219e-05, + "loss": 0.08309173583984375, + "step": 5639 + }, + { + "epoch": 0.3812356360686765, + "grad_norm": 0.40484896302223206, + "learning_rate": 2.1430006243878458e-05, + "loss": 0.056304931640625, + "step": 5640 + }, + { + "epoch": 0.38130323103961067, + "grad_norm": 1.0997010469436646, + "learning_rate": 2.1427025086491037e-05, + "loss": 0.189788818359375, + "step": 5641 + }, + { + "epoch": 0.38137082601054484, + "grad_norm": 0.7083362340927124, + "learning_rate": 2.142404361812417e-05, + "loss": 0.10482025146484375, + "step": 5642 + }, + { + "epoch": 0.38143842098147895, + "grad_norm": 1.2314457893371582, + "learning_rate": 2.142106183892213e-05, + "loss": 0.12713623046875, + "step": 5643 + }, + { + "epoch": 0.3815060159524131, + "grad_norm": 0.49962863326072693, + "learning_rate": 2.141807974902918e-05, + "loss": 0.09687042236328125, + "step": 5644 + }, + { + "epoch": 0.3815736109233473, + "grad_norm": 0.28956425189971924, + "learning_rate": 2.1415097348589622e-05, + "loss": 0.059844970703125, + "step": 5645 + }, + { + "epoch": 0.38164120589428147, + "grad_norm": 0.8193879723548889, + "learning_rate": 2.1412114637747755e-05, + "loss": 0.158966064453125, + "step": 5646 + }, + { + "epoch": 0.38170880086521564, + "grad_norm": 0.570428729057312, + "learning_rate": 2.1409131616647907e-05, + "loss": 0.1148681640625, + "step": 5647 + }, + { + "epoch": 0.3817763958361498, + "grad_norm": 0.4238043427467346, + "learning_rate": 2.1406148285434416e-05, + "loss": 0.0812835693359375, + "step": 5648 + }, + { + "epoch": 0.381843990807084, + "grad_norm": 0.38922950625419617, + "learning_rate": 2.140316464425163e-05, + "loss": 0.07757568359375, + "step": 5649 + }, + { + "epoch": 0.3819115857780181, + "grad_norm": 0.5058549642562866, + "learning_rate": 2.1400180693243915e-05, + "loss": 0.07748794555664062, + "step": 5650 + }, + { + "epoch": 0.38197918074895226, + "grad_norm": 1.2263740301132202, + "learning_rate": 2.1397196432555662e-05, + "loss": 0.1561737060546875, + "step": 5651 + }, + { + "epoch": 0.38204677571988643, + "grad_norm": 0.3429669141769409, + "learning_rate": 2.1394211862331256e-05, + "loss": 0.059902191162109375, + "step": 5652 + }, + { + "epoch": 0.3821143706908206, + "grad_norm": 0.7609924674034119, + "learning_rate": 2.139122698271512e-05, + "loss": 0.10947036743164062, + "step": 5653 + }, + { + "epoch": 0.3821819656617548, + "grad_norm": 0.5678032040596008, + "learning_rate": 2.138824179385167e-05, + "loss": 0.13909912109375, + "step": 5654 + }, + { + "epoch": 0.38224956063268895, + "grad_norm": 0.23799024522304535, + "learning_rate": 2.138525629588536e-05, + "loss": 0.05168914794921875, + "step": 5655 + }, + { + "epoch": 0.3823171556036231, + "grad_norm": 1.132759690284729, + "learning_rate": 2.1382270488960633e-05, + "loss": 0.216156005859375, + "step": 5656 + }, + { + "epoch": 0.38238475057455723, + "grad_norm": 0.2590712308883667, + "learning_rate": 2.1379284373221975e-05, + "loss": 0.054958343505859375, + "step": 5657 + }, + { + "epoch": 0.3824523455454914, + "grad_norm": 0.5986912846565247, + "learning_rate": 2.1376297948813865e-05, + "loss": 0.13214111328125, + "step": 5658 + }, + { + "epoch": 0.38251994051642557, + "grad_norm": 0.982063889503479, + "learning_rate": 2.1373311215880805e-05, + "loss": 0.164276123046875, + "step": 5659 + }, + { + "epoch": 0.38258753548735974, + "grad_norm": 0.4195707142353058, + "learning_rate": 2.1370324174567314e-05, + "loss": 0.0820770263671875, + "step": 5660 + }, + { + "epoch": 0.3826551304582939, + "grad_norm": 0.8954154849052429, + "learning_rate": 2.1367336825017927e-05, + "loss": 0.15331268310546875, + "step": 5661 + }, + { + "epoch": 0.3827227254292281, + "grad_norm": 0.9214529395103455, + "learning_rate": 2.1364349167377185e-05, + "loss": 0.1611785888671875, + "step": 5662 + }, + { + "epoch": 0.38279032040016225, + "grad_norm": 0.3715953230857849, + "learning_rate": 2.1361361201789647e-05, + "loss": 0.05898857116699219, + "step": 5663 + }, + { + "epoch": 0.38285791537109637, + "grad_norm": 0.310007244348526, + "learning_rate": 2.1358372928399893e-05, + "loss": 0.0633392333984375, + "step": 5664 + }, + { + "epoch": 0.38292551034203054, + "grad_norm": 0.14094655215740204, + "learning_rate": 2.1355384347352516e-05, + "loss": 0.030986785888671875, + "step": 5665 + }, + { + "epoch": 0.3829931053129647, + "grad_norm": 0.7178695201873779, + "learning_rate": 2.1352395458792125e-05, + "loss": 0.204498291015625, + "step": 5666 + }, + { + "epoch": 0.3830607002838989, + "grad_norm": 0.6907991766929626, + "learning_rate": 2.1349406262863332e-05, + "loss": 0.103515625, + "step": 5667 + }, + { + "epoch": 0.38312829525483305, + "grad_norm": 0.41661861538887024, + "learning_rate": 2.1346416759710776e-05, + "loss": 0.05384063720703125, + "step": 5668 + }, + { + "epoch": 0.3831958902257672, + "grad_norm": 0.4919425845146179, + "learning_rate": 2.1343426949479113e-05, + "loss": 0.1285858154296875, + "step": 5669 + }, + { + "epoch": 0.3832634851967014, + "grad_norm": 0.31266024708747864, + "learning_rate": 2.1340436832313004e-05, + "loss": 0.0852508544921875, + "step": 5670 + }, + { + "epoch": 0.3833310801676355, + "grad_norm": 0.416660338640213, + "learning_rate": 2.1337446408357128e-05, + "loss": 0.09966659545898438, + "step": 5671 + }, + { + "epoch": 0.3833986751385697, + "grad_norm": 0.5754364728927612, + "learning_rate": 2.1334455677756185e-05, + "loss": 0.1056060791015625, + "step": 5672 + }, + { + "epoch": 0.38346627010950385, + "grad_norm": 0.8266611695289612, + "learning_rate": 2.133146464065488e-05, + "loss": 0.1403350830078125, + "step": 5673 + }, + { + "epoch": 0.383533865080438, + "grad_norm": 0.4532550275325775, + "learning_rate": 2.1328473297197942e-05, + "loss": 0.08324813842773438, + "step": 5674 + }, + { + "epoch": 0.3836014600513722, + "grad_norm": 1.2020366191864014, + "learning_rate": 2.132548164753011e-05, + "loss": 0.2281494140625, + "step": 5675 + }, + { + "epoch": 0.38366905502230636, + "grad_norm": 0.5354365110397339, + "learning_rate": 2.132248969179614e-05, + "loss": 0.11678314208984375, + "step": 5676 + }, + { + "epoch": 0.38373664999324053, + "grad_norm": 0.7138845324516296, + "learning_rate": 2.13194974301408e-05, + "loss": 0.1427154541015625, + "step": 5677 + }, + { + "epoch": 0.38380424496417465, + "grad_norm": 0.14454098045825958, + "learning_rate": 2.131650486270887e-05, + "loss": 0.01804065704345703, + "step": 5678 + }, + { + "epoch": 0.3838718399351088, + "grad_norm": 0.16179747879505157, + "learning_rate": 2.1313511989645156e-05, + "loss": 0.0311431884765625, + "step": 5679 + }, + { + "epoch": 0.383939434906043, + "grad_norm": 0.7336875200271606, + "learning_rate": 2.1310518811094465e-05, + "loss": 0.13182830810546875, + "step": 5680 + }, + { + "epoch": 0.38400702987697716, + "grad_norm": 0.5321494936943054, + "learning_rate": 2.1307525327201632e-05, + "loss": 0.1113739013671875, + "step": 5681 + }, + { + "epoch": 0.38407462484791133, + "grad_norm": 0.1859537810087204, + "learning_rate": 2.1304531538111494e-05, + "loss": 0.03481101989746094, + "step": 5682 + }, + { + "epoch": 0.3841422198188455, + "grad_norm": 0.7322016954421997, + "learning_rate": 2.1301537443968918e-05, + "loss": 0.138824462890625, + "step": 5683 + }, + { + "epoch": 0.3842098147897796, + "grad_norm": 1.0690635442733765, + "learning_rate": 2.1298543044918764e-05, + "loss": 0.147552490234375, + "step": 5684 + }, + { + "epoch": 0.3842774097607138, + "grad_norm": 0.4189439117908478, + "learning_rate": 2.129554834110594e-05, + "loss": 0.0619049072265625, + "step": 5685 + }, + { + "epoch": 0.38434500473164795, + "grad_norm": 0.5505232214927673, + "learning_rate": 2.1292553332675326e-05, + "loss": 0.0875244140625, + "step": 5686 + }, + { + "epoch": 0.3844125997025821, + "grad_norm": 0.36591053009033203, + "learning_rate": 2.1289558019771852e-05, + "loss": 0.0729827880859375, + "step": 5687 + }, + { + "epoch": 0.3844801946735163, + "grad_norm": 0.7784761190414429, + "learning_rate": 2.128656240254044e-05, + "loss": 0.1404266357421875, + "step": 5688 + }, + { + "epoch": 0.38454778964445047, + "grad_norm": 0.4165715277194977, + "learning_rate": 2.1283566481126052e-05, + "loss": 0.0581512451171875, + "step": 5689 + }, + { + "epoch": 0.38461538461538464, + "grad_norm": 0.23421020805835724, + "learning_rate": 2.1280570255673638e-05, + "loss": 0.042705535888671875, + "step": 5690 + }, + { + "epoch": 0.38468297958631875, + "grad_norm": 0.7310831546783447, + "learning_rate": 2.1277573726328178e-05, + "loss": 0.13565826416015625, + "step": 5691 + }, + { + "epoch": 0.3847505745572529, + "grad_norm": 0.41021138429641724, + "learning_rate": 2.1274576893234656e-05, + "loss": 0.0850982666015625, + "step": 5692 + }, + { + "epoch": 0.3848181695281871, + "grad_norm": 1.1301860809326172, + "learning_rate": 2.1271579756538086e-05, + "loss": 0.226409912109375, + "step": 5693 + }, + { + "epoch": 0.38488576449912126, + "grad_norm": 0.5531219840049744, + "learning_rate": 2.1268582316383485e-05, + "loss": 0.08233642578125, + "step": 5694 + }, + { + "epoch": 0.38495335947005543, + "grad_norm": 0.9612519145011902, + "learning_rate": 2.126558457291589e-05, + "loss": 0.141937255859375, + "step": 5695 + }, + { + "epoch": 0.3850209544409896, + "grad_norm": 0.6883136630058289, + "learning_rate": 2.1262586526280344e-05, + "loss": 0.09954071044921875, + "step": 5696 + }, + { + "epoch": 0.3850885494119238, + "grad_norm": 2.8750298023223877, + "learning_rate": 2.1259588176621917e-05, + "loss": 0.256561279296875, + "step": 5697 + }, + { + "epoch": 0.3851561443828579, + "grad_norm": 0.2777596414089203, + "learning_rate": 2.1256589524085687e-05, + "loss": 0.06243896484375, + "step": 5698 + }, + { + "epoch": 0.38522373935379206, + "grad_norm": 0.8082590699195862, + "learning_rate": 2.1253590568816748e-05, + "loss": 0.223419189453125, + "step": 5699 + }, + { + "epoch": 0.38529133432472623, + "grad_norm": 1.1674472093582153, + "learning_rate": 2.1250591310960203e-05, + "loss": 0.1837615966796875, + "step": 5700 + }, + { + "epoch": 0.3853589292956604, + "grad_norm": 0.478867769241333, + "learning_rate": 2.1247591750661176e-05, + "loss": 0.1205596923828125, + "step": 5701 + }, + { + "epoch": 0.3854265242665946, + "grad_norm": 0.9935895204544067, + "learning_rate": 2.1244591888064814e-05, + "loss": 0.20135498046875, + "step": 5702 + }, + { + "epoch": 0.38549411923752874, + "grad_norm": 0.8488478064537048, + "learning_rate": 2.1241591723316256e-05, + "loss": 0.18646240234375, + "step": 5703 + }, + { + "epoch": 0.3855617142084629, + "grad_norm": 0.9790443778038025, + "learning_rate": 2.1238591256560678e-05, + "loss": 0.1724090576171875, + "step": 5704 + }, + { + "epoch": 0.38562930917939703, + "grad_norm": 0.3635800778865814, + "learning_rate": 2.1235590487943253e-05, + "loss": 0.07497406005859375, + "step": 5705 + }, + { + "epoch": 0.3856969041503312, + "grad_norm": 0.4026147723197937, + "learning_rate": 2.1232589417609185e-05, + "loss": 0.09020233154296875, + "step": 5706 + }, + { + "epoch": 0.38576449912126537, + "grad_norm": 0.24752704799175262, + "learning_rate": 2.122958804570368e-05, + "loss": 0.0573577880859375, + "step": 5707 + }, + { + "epoch": 0.38583209409219954, + "grad_norm": 0.6052045226097107, + "learning_rate": 2.1226586372371966e-05, + "loss": 0.1189422607421875, + "step": 5708 + }, + { + "epoch": 0.3858996890631337, + "grad_norm": 0.47875741124153137, + "learning_rate": 2.122358439775928e-05, + "loss": 0.106109619140625, + "step": 5709 + }, + { + "epoch": 0.3859672840340679, + "grad_norm": 1.0094038248062134, + "learning_rate": 2.1220582122010873e-05, + "loss": 0.13190460205078125, + "step": 5710 + }, + { + "epoch": 0.38603487900500205, + "grad_norm": 1.0093507766723633, + "learning_rate": 2.1217579545272023e-05, + "loss": 0.15879440307617188, + "step": 5711 + }, + { + "epoch": 0.38610247397593617, + "grad_norm": 0.7976500988006592, + "learning_rate": 2.1214576667688005e-05, + "loss": 0.197906494140625, + "step": 5712 + }, + { + "epoch": 0.38617006894687034, + "grad_norm": 0.7707148790359497, + "learning_rate": 2.1211573489404115e-05, + "loss": 0.1673583984375, + "step": 5713 + }, + { + "epoch": 0.3862376639178045, + "grad_norm": 0.2897168695926666, + "learning_rate": 2.120857001056568e-05, + "loss": 0.07012939453125, + "step": 5714 + }, + { + "epoch": 0.3863052588887387, + "grad_norm": 0.3101334273815155, + "learning_rate": 2.120556623131801e-05, + "loss": 0.04430389404296875, + "step": 5715 + }, + { + "epoch": 0.38637285385967285, + "grad_norm": 0.5273213982582092, + "learning_rate": 2.1202562151806456e-05, + "loss": 0.10668182373046875, + "step": 5716 + }, + { + "epoch": 0.386440448830607, + "grad_norm": 0.2275746911764145, + "learning_rate": 2.119955777217637e-05, + "loss": 0.0444183349609375, + "step": 5717 + }, + { + "epoch": 0.3865080438015412, + "grad_norm": 0.65022873878479, + "learning_rate": 2.1196553092573126e-05, + "loss": 0.114959716796875, + "step": 5718 + }, + { + "epoch": 0.3865756387724753, + "grad_norm": 0.43554025888442993, + "learning_rate": 2.119354811314211e-05, + "loss": 0.09819793701171875, + "step": 5719 + }, + { + "epoch": 0.3866432337434095, + "grad_norm": 0.6700936555862427, + "learning_rate": 2.119054283402872e-05, + "loss": 0.1588287353515625, + "step": 5720 + }, + { + "epoch": 0.38671082871434365, + "grad_norm": 0.4053064286708832, + "learning_rate": 2.118753725537836e-05, + "loss": 0.0810394287109375, + "step": 5721 + }, + { + "epoch": 0.3867784236852778, + "grad_norm": 0.8114559650421143, + "learning_rate": 2.1184531377336476e-05, + "loss": 0.15656661987304688, + "step": 5722 + }, + { + "epoch": 0.386846018656212, + "grad_norm": 0.43982672691345215, + "learning_rate": 2.11815252000485e-05, + "loss": 0.08826446533203125, + "step": 5723 + }, + { + "epoch": 0.38691361362714616, + "grad_norm": 0.542712390422821, + "learning_rate": 2.117851872365989e-05, + "loss": 0.08377838134765625, + "step": 5724 + }, + { + "epoch": 0.38698120859808033, + "grad_norm": 0.5156736969947815, + "learning_rate": 2.1175511948316127e-05, + "loss": 0.105987548828125, + "step": 5725 + }, + { + "epoch": 0.38704880356901444, + "grad_norm": 1.1033551692962646, + "learning_rate": 2.1172504874162683e-05, + "loss": 0.15711212158203125, + "step": 5726 + }, + { + "epoch": 0.3871163985399486, + "grad_norm": 0.2069946527481079, + "learning_rate": 2.1169497501345072e-05, + "loss": 0.03540802001953125, + "step": 5727 + }, + { + "epoch": 0.3871839935108828, + "grad_norm": 0.4331667721271515, + "learning_rate": 2.1166489830008803e-05, + "loss": 0.08319091796875, + "step": 5728 + }, + { + "epoch": 0.38725158848181696, + "grad_norm": 0.23463180661201477, + "learning_rate": 2.1163481860299407e-05, + "loss": 0.052967071533203125, + "step": 5729 + }, + { + "epoch": 0.3873191834527511, + "grad_norm": 0.37168094515800476, + "learning_rate": 2.1160473592362423e-05, + "loss": 0.06697845458984375, + "step": 5730 + }, + { + "epoch": 0.3873867784236853, + "grad_norm": 0.4524354338645935, + "learning_rate": 2.1157465026343422e-05, + "loss": 0.06756591796875, + "step": 5731 + }, + { + "epoch": 0.38745437339461947, + "grad_norm": 0.4973910450935364, + "learning_rate": 2.115445616238797e-05, + "loss": 0.10890960693359375, + "step": 5732 + }, + { + "epoch": 0.3875219683655536, + "grad_norm": 0.9528948068618774, + "learning_rate": 2.115144700064166e-05, + "loss": 0.1661376953125, + "step": 5733 + }, + { + "epoch": 0.38758956333648775, + "grad_norm": 0.5637428760528564, + "learning_rate": 2.1148437541250072e-05, + "loss": 0.1164093017578125, + "step": 5734 + }, + { + "epoch": 0.3876571583074219, + "grad_norm": 0.4730967879295349, + "learning_rate": 2.114542778435885e-05, + "loss": 0.1300811767578125, + "step": 5735 + }, + { + "epoch": 0.3877247532783561, + "grad_norm": 0.6370868682861328, + "learning_rate": 2.1142417730113614e-05, + "loss": 0.1506500244140625, + "step": 5736 + }, + { + "epoch": 0.38779234824929026, + "grad_norm": 0.9339758157730103, + "learning_rate": 2.113940737866001e-05, + "loss": 0.19561767578125, + "step": 5737 + }, + { + "epoch": 0.38785994322022443, + "grad_norm": 0.4390036463737488, + "learning_rate": 2.1136396730143687e-05, + "loss": 0.0937652587890625, + "step": 5738 + }, + { + "epoch": 0.3879275381911586, + "grad_norm": 0.5026559233665466, + "learning_rate": 2.113338578471034e-05, + "loss": 0.1031789779663086, + "step": 5739 + }, + { + "epoch": 0.3879951331620927, + "grad_norm": 0.816297173500061, + "learning_rate": 2.113037454250564e-05, + "loss": 0.1444854736328125, + "step": 5740 + }, + { + "epoch": 0.3880627281330269, + "grad_norm": 0.527790904045105, + "learning_rate": 2.11273630036753e-05, + "loss": 0.093902587890625, + "step": 5741 + }, + { + "epoch": 0.38813032310396106, + "grad_norm": 0.2633693516254425, + "learning_rate": 2.1124351168365027e-05, + "loss": 0.06235504150390625, + "step": 5742 + }, + { + "epoch": 0.38819791807489523, + "grad_norm": 0.2184690237045288, + "learning_rate": 2.1121339036720553e-05, + "loss": 0.04557037353515625, + "step": 5743 + }, + { + "epoch": 0.3882655130458294, + "grad_norm": 0.3307032585144043, + "learning_rate": 2.1118326608887637e-05, + "loss": 0.03528022766113281, + "step": 5744 + }, + { + "epoch": 0.3883331080167636, + "grad_norm": 0.788033664226532, + "learning_rate": 2.1115313885012027e-05, + "loss": 0.15789794921875, + "step": 5745 + }, + { + "epoch": 0.3884007029876977, + "grad_norm": 0.37303033471107483, + "learning_rate": 2.11123008652395e-05, + "loss": 0.062530517578125, + "step": 5746 + }, + { + "epoch": 0.38846829795863186, + "grad_norm": 1.4032487869262695, + "learning_rate": 2.1109287549715847e-05, + "loss": 0.251068115234375, + "step": 5747 + }, + { + "epoch": 0.38853589292956603, + "grad_norm": 0.5486981272697449, + "learning_rate": 2.110627393858687e-05, + "loss": 0.112457275390625, + "step": 5748 + }, + { + "epoch": 0.3886034879005002, + "grad_norm": 0.5866096019744873, + "learning_rate": 2.1103260031998383e-05, + "loss": 0.1590423583984375, + "step": 5749 + }, + { + "epoch": 0.38867108287143437, + "grad_norm": 0.9891741275787354, + "learning_rate": 2.110024583009622e-05, + "loss": 0.12810134887695312, + "step": 5750 + }, + { + "epoch": 0.38873867784236854, + "grad_norm": 1.3559300899505615, + "learning_rate": 2.1097231333026223e-05, + "loss": 0.2338409423828125, + "step": 5751 + }, + { + "epoch": 0.3888062728133027, + "grad_norm": 0.749700129032135, + "learning_rate": 2.109421654093426e-05, + "loss": 0.22418212890625, + "step": 5752 + }, + { + "epoch": 0.3888738677842368, + "grad_norm": 1.1489567756652832, + "learning_rate": 2.1091201453966202e-05, + "loss": 0.1569061279296875, + "step": 5753 + }, + { + "epoch": 0.388941462755171, + "grad_norm": 0.21743252873420715, + "learning_rate": 2.1088186072267937e-05, + "loss": 0.0338287353515625, + "step": 5754 + }, + { + "epoch": 0.38900905772610517, + "grad_norm": 1.0426470041275024, + "learning_rate": 2.1085170395985366e-05, + "loss": 0.1595611572265625, + "step": 5755 + }, + { + "epoch": 0.38907665269703934, + "grad_norm": 1.2176138162612915, + "learning_rate": 2.1082154425264408e-05, + "loss": 0.211456298828125, + "step": 5756 + }, + { + "epoch": 0.3891442476679735, + "grad_norm": 0.6264159083366394, + "learning_rate": 2.1079138160250997e-05, + "loss": 0.14141845703125, + "step": 5757 + }, + { + "epoch": 0.3892118426389077, + "grad_norm": 0.5163393020629883, + "learning_rate": 2.1076121601091073e-05, + "loss": 0.08774566650390625, + "step": 5758 + }, + { + "epoch": 0.38927943760984185, + "grad_norm": 1.390884280204773, + "learning_rate": 2.10731047479306e-05, + "loss": 0.260345458984375, + "step": 5759 + }, + { + "epoch": 0.38934703258077596, + "grad_norm": 0.5560147762298584, + "learning_rate": 2.107008760091555e-05, + "loss": 0.0725860595703125, + "step": 5760 + }, + { + "epoch": 0.38941462755171014, + "grad_norm": 0.6897339224815369, + "learning_rate": 2.1067070160191916e-05, + "loss": 0.09833145141601562, + "step": 5761 + }, + { + "epoch": 0.3894822225226443, + "grad_norm": 0.7981961965560913, + "learning_rate": 2.1064052425905695e-05, + "loss": 0.11679840087890625, + "step": 5762 + }, + { + "epoch": 0.3895498174935785, + "grad_norm": 1.9956544637680054, + "learning_rate": 2.1061034398202904e-05, + "loss": 0.239776611328125, + "step": 5763 + }, + { + "epoch": 0.38961741246451265, + "grad_norm": 0.467504620552063, + "learning_rate": 2.1058016077229575e-05, + "loss": 0.084991455078125, + "step": 5764 + }, + { + "epoch": 0.3896850074354468, + "grad_norm": 0.8014322519302368, + "learning_rate": 2.1054997463131753e-05, + "loss": 0.1915283203125, + "step": 5765 + }, + { + "epoch": 0.389752602406381, + "grad_norm": 1.151593565940857, + "learning_rate": 2.1051978556055504e-05, + "loss": 0.243560791015625, + "step": 5766 + }, + { + "epoch": 0.3898201973773151, + "grad_norm": 0.4659939408302307, + "learning_rate": 2.104895935614689e-05, + "loss": 0.0703887939453125, + "step": 5767 + }, + { + "epoch": 0.3898877923482493, + "grad_norm": 0.1688755750656128, + "learning_rate": 2.104593986355201e-05, + "loss": 0.0322723388671875, + "step": 5768 + }, + { + "epoch": 0.38995538731918344, + "grad_norm": 0.33965182304382324, + "learning_rate": 2.1042920078416958e-05, + "loss": 0.046356201171875, + "step": 5769 + }, + { + "epoch": 0.3900229822901176, + "grad_norm": 0.38624268770217896, + "learning_rate": 2.103990000088785e-05, + "loss": 0.06976318359375, + "step": 5770 + }, + { + "epoch": 0.3900905772610518, + "grad_norm": 0.4672416150569916, + "learning_rate": 2.103687963111082e-05, + "loss": 0.09814453125, + "step": 5771 + }, + { + "epoch": 0.39015817223198596, + "grad_norm": 0.6991533041000366, + "learning_rate": 2.1033858969232006e-05, + "loss": 0.12300872802734375, + "step": 5772 + }, + { + "epoch": 0.3902257672029201, + "grad_norm": 1.141791820526123, + "learning_rate": 2.1030838015397574e-05, + "loss": 0.2174072265625, + "step": 5773 + }, + { + "epoch": 0.39029336217385424, + "grad_norm": 1.2011356353759766, + "learning_rate": 2.1027816769753692e-05, + "loss": 0.1868743896484375, + "step": 5774 + }, + { + "epoch": 0.3903609571447884, + "grad_norm": 0.8747810125350952, + "learning_rate": 2.1024795232446554e-05, + "loss": 0.127777099609375, + "step": 5775 + }, + { + "epoch": 0.3904285521157226, + "grad_norm": 0.5538730025291443, + "learning_rate": 2.102177340362235e-05, + "loss": 0.11248779296875, + "step": 5776 + }, + { + "epoch": 0.39049614708665675, + "grad_norm": 1.0123108625411987, + "learning_rate": 2.10187512834273e-05, + "loss": 0.14178466796875, + "step": 5777 + }, + { + "epoch": 0.3905637420575909, + "grad_norm": 0.2157827615737915, + "learning_rate": 2.1015728872007635e-05, + "loss": 0.04291534423828125, + "step": 5778 + }, + { + "epoch": 0.3906313370285251, + "grad_norm": 0.31960010528564453, + "learning_rate": 2.101270616950959e-05, + "loss": 0.065338134765625, + "step": 5779 + }, + { + "epoch": 0.39069893199945926, + "grad_norm": 0.582244336605072, + "learning_rate": 2.1009683176079434e-05, + "loss": 0.09339141845703125, + "step": 5780 + }, + { + "epoch": 0.3907665269703934, + "grad_norm": 2.260935068130493, + "learning_rate": 2.100665989186343e-05, + "loss": 0.28485107421875, + "step": 5781 + }, + { + "epoch": 0.39083412194132755, + "grad_norm": 1.1576697826385498, + "learning_rate": 2.1003636317007862e-05, + "loss": 0.203857421875, + "step": 5782 + }, + { + "epoch": 0.3909017169122617, + "grad_norm": 0.23352423310279846, + "learning_rate": 2.100061245165904e-05, + "loss": 0.031940460205078125, + "step": 5783 + }, + { + "epoch": 0.3909693118831959, + "grad_norm": 0.40500637888908386, + "learning_rate": 2.0997588295963264e-05, + "loss": 0.06927490234375, + "step": 5784 + }, + { + "epoch": 0.39103690685413006, + "grad_norm": 1.176101565361023, + "learning_rate": 2.099456385006687e-05, + "loss": 0.19122314453125, + "step": 5785 + }, + { + "epoch": 0.39110450182506423, + "grad_norm": 0.7568636536598206, + "learning_rate": 2.0991539114116196e-05, + "loss": 0.15850830078125, + "step": 5786 + }, + { + "epoch": 0.3911720967959984, + "grad_norm": 0.8180675506591797, + "learning_rate": 2.09885140882576e-05, + "loss": 0.192230224609375, + "step": 5787 + }, + { + "epoch": 0.3912396917669325, + "grad_norm": 0.42293480038642883, + "learning_rate": 2.0985488772637452e-05, + "loss": 0.1018524169921875, + "step": 5788 + }, + { + "epoch": 0.3913072867378667, + "grad_norm": 0.6711530685424805, + "learning_rate": 2.098246316740213e-05, + "loss": 0.14101409912109375, + "step": 5789 + }, + { + "epoch": 0.39137488170880086, + "grad_norm": 0.9649808406829834, + "learning_rate": 2.0979437272698038e-05, + "loss": 0.150115966796875, + "step": 5790 + }, + { + "epoch": 0.39144247667973503, + "grad_norm": 1.2335801124572754, + "learning_rate": 2.0976411088671584e-05, + "loss": 0.172332763671875, + "step": 5791 + }, + { + "epoch": 0.3915100716506692, + "grad_norm": 0.34155789017677307, + "learning_rate": 2.0973384615469197e-05, + "loss": 0.0487823486328125, + "step": 5792 + }, + { + "epoch": 0.39157766662160337, + "grad_norm": 0.6047626733779907, + "learning_rate": 2.0970357853237312e-05, + "loss": 0.11956787109375, + "step": 5793 + }, + { + "epoch": 0.39164526159253754, + "grad_norm": 0.5581410527229309, + "learning_rate": 2.0967330802122383e-05, + "loss": 0.07195186614990234, + "step": 5794 + }, + { + "epoch": 0.39171285656347166, + "grad_norm": 0.8459596037864685, + "learning_rate": 2.096430346227088e-05, + "loss": 0.1781005859375, + "step": 5795 + }, + { + "epoch": 0.3917804515344058, + "grad_norm": 0.817590594291687, + "learning_rate": 2.0961275833829286e-05, + "loss": 0.1820831298828125, + "step": 5796 + }, + { + "epoch": 0.39184804650534, + "grad_norm": 0.3885725736618042, + "learning_rate": 2.0958247916944093e-05, + "loss": 0.072723388671875, + "step": 5797 + }, + { + "epoch": 0.39191564147627417, + "grad_norm": 0.46952468156814575, + "learning_rate": 2.0955219711761814e-05, + "loss": 0.08557891845703125, + "step": 5798 + }, + { + "epoch": 0.39198323644720834, + "grad_norm": 0.2804965376853943, + "learning_rate": 2.0952191218428968e-05, + "loss": 0.047088623046875, + "step": 5799 + }, + { + "epoch": 0.3920508314181425, + "grad_norm": 0.6169939637184143, + "learning_rate": 2.0949162437092096e-05, + "loss": 0.08965682983398438, + "step": 5800 + }, + { + "epoch": 0.3921184263890767, + "grad_norm": 0.497262179851532, + "learning_rate": 2.094613336789775e-05, + "loss": 0.0890045166015625, + "step": 5801 + }, + { + "epoch": 0.3921860213600108, + "grad_norm": 0.6942182183265686, + "learning_rate": 2.094310401099249e-05, + "loss": 0.1251373291015625, + "step": 5802 + }, + { + "epoch": 0.39225361633094497, + "grad_norm": 0.20666851103305817, + "learning_rate": 2.0940074366522898e-05, + "loss": 0.038829803466796875, + "step": 5803 + }, + { + "epoch": 0.39232121130187914, + "grad_norm": 0.2451309859752655, + "learning_rate": 2.0937044434635567e-05, + "loss": 0.0391693115234375, + "step": 5804 + }, + { + "epoch": 0.3923888062728133, + "grad_norm": 0.22770319879055023, + "learning_rate": 2.0934014215477103e-05, + "loss": 0.035312652587890625, + "step": 5805 + }, + { + "epoch": 0.3924564012437475, + "grad_norm": 0.6341089010238647, + "learning_rate": 2.093098370919413e-05, + "loss": 0.10097503662109375, + "step": 5806 + }, + { + "epoch": 0.39252399621468165, + "grad_norm": 0.28136882185935974, + "learning_rate": 2.0927952915933283e-05, + "loss": 0.04241180419921875, + "step": 5807 + }, + { + "epoch": 0.39259159118561576, + "grad_norm": 1.0399372577667236, + "learning_rate": 2.0924921835841204e-05, + "loss": 0.19158935546875, + "step": 5808 + }, + { + "epoch": 0.39265918615654993, + "grad_norm": 0.8090241551399231, + "learning_rate": 2.0921890469064562e-05, + "loss": 0.1224212646484375, + "step": 5809 + }, + { + "epoch": 0.3927267811274841, + "grad_norm": 0.18518364429473877, + "learning_rate": 2.0918858815750027e-05, + "loss": 0.020814895629882812, + "step": 5810 + }, + { + "epoch": 0.3927943760984183, + "grad_norm": 0.8061296939849854, + "learning_rate": 2.09158268760443e-05, + "loss": 0.10282135009765625, + "step": 5811 + }, + { + "epoch": 0.39286197106935244, + "grad_norm": 0.3633457124233246, + "learning_rate": 2.0912794650094075e-05, + "loss": 0.06379508972167969, + "step": 5812 + }, + { + "epoch": 0.3929295660402866, + "grad_norm": 1.2620184421539307, + "learning_rate": 2.0909762138046076e-05, + "loss": 0.15277862548828125, + "step": 5813 + }, + { + "epoch": 0.3929971610112208, + "grad_norm": 0.617826521396637, + "learning_rate": 2.0906729340047032e-05, + "loss": 0.0985107421875, + "step": 5814 + }, + { + "epoch": 0.3930647559821549, + "grad_norm": 0.6700730323791504, + "learning_rate": 2.090369625624369e-05, + "loss": 0.11384010314941406, + "step": 5815 + }, + { + "epoch": 0.39313235095308907, + "grad_norm": 0.9423664212226868, + "learning_rate": 2.0900662886782805e-05, + "loss": 0.155487060546875, + "step": 5816 + }, + { + "epoch": 0.39319994592402324, + "grad_norm": 0.9582924842834473, + "learning_rate": 2.0897629231811156e-05, + "loss": 0.11346054077148438, + "step": 5817 + }, + { + "epoch": 0.3932675408949574, + "grad_norm": 0.8420773148536682, + "learning_rate": 2.0894595291475524e-05, + "loss": 0.1488800048828125, + "step": 5818 + }, + { + "epoch": 0.3933351358658916, + "grad_norm": 0.44384193420410156, + "learning_rate": 2.0891561065922716e-05, + "loss": 0.08751678466796875, + "step": 5819 + }, + { + "epoch": 0.39340273083682575, + "grad_norm": 0.8865960240364075, + "learning_rate": 2.0888526555299546e-05, + "loss": 0.1708984375, + "step": 5820 + }, + { + "epoch": 0.3934703258077599, + "grad_norm": 0.32465308904647827, + "learning_rate": 2.088549175975284e-05, + "loss": 0.04598236083984375, + "step": 5821 + }, + { + "epoch": 0.39353792077869404, + "grad_norm": 0.41310620307922363, + "learning_rate": 2.088245667942944e-05, + "loss": 0.08309173583984375, + "step": 5822 + }, + { + "epoch": 0.3936055157496282, + "grad_norm": 0.147141695022583, + "learning_rate": 2.0879421314476204e-05, + "loss": 0.021512985229492188, + "step": 5823 + }, + { + "epoch": 0.3936731107205624, + "grad_norm": 0.36047542095184326, + "learning_rate": 2.087638566504e-05, + "loss": 0.08766937255859375, + "step": 5824 + }, + { + "epoch": 0.39374070569149655, + "grad_norm": 0.602063775062561, + "learning_rate": 2.087334973126772e-05, + "loss": 0.10772705078125, + "step": 5825 + }, + { + "epoch": 0.3938083006624307, + "grad_norm": 0.611430287361145, + "learning_rate": 2.0870313513306243e-05, + "loss": 0.0716094970703125, + "step": 5826 + }, + { + "epoch": 0.3938758956333649, + "grad_norm": 0.34607386589050293, + "learning_rate": 2.0867277011302496e-05, + "loss": 0.06492233276367188, + "step": 5827 + }, + { + "epoch": 0.39394349060429906, + "grad_norm": 0.8009745478630066, + "learning_rate": 2.0864240225403404e-05, + "loss": 0.114105224609375, + "step": 5828 + }, + { + "epoch": 0.3940110855752332, + "grad_norm": 0.38598379492759705, + "learning_rate": 2.0861203155755892e-05, + "loss": 0.064208984375, + "step": 5829 + }, + { + "epoch": 0.39407868054616735, + "grad_norm": 1.077879548072815, + "learning_rate": 2.0858165802506926e-05, + "loss": 0.1373291015625, + "step": 5830 + }, + { + "epoch": 0.3941462755171015, + "grad_norm": 0.5364583134651184, + "learning_rate": 2.0855128165803467e-05, + "loss": 0.060272216796875, + "step": 5831 + }, + { + "epoch": 0.3942138704880357, + "grad_norm": 0.7797439694404602, + "learning_rate": 2.085209024579249e-05, + "loss": 0.155914306640625, + "step": 5832 + }, + { + "epoch": 0.39428146545896986, + "grad_norm": 1.0543339252471924, + "learning_rate": 2.0849052042621e-05, + "loss": 0.2120513916015625, + "step": 5833 + }, + { + "epoch": 0.39434906042990403, + "grad_norm": 0.7612540125846863, + "learning_rate": 2.0846013556435993e-05, + "loss": 0.12322998046875, + "step": 5834 + }, + { + "epoch": 0.3944166554008382, + "grad_norm": 0.5140327215194702, + "learning_rate": 2.084297478738449e-05, + "loss": 0.07939529418945312, + "step": 5835 + }, + { + "epoch": 0.3944842503717723, + "grad_norm": 0.4445561170578003, + "learning_rate": 2.0839935735613538e-05, + "loss": 0.09479141235351562, + "step": 5836 + }, + { + "epoch": 0.3945518453427065, + "grad_norm": 0.6699254512786865, + "learning_rate": 2.0836896401270176e-05, + "loss": 0.155731201171875, + "step": 5837 + }, + { + "epoch": 0.39461944031364066, + "grad_norm": 0.9004197716712952, + "learning_rate": 2.0833856784501465e-05, + "loss": 0.1631011962890625, + "step": 5838 + }, + { + "epoch": 0.3946870352845748, + "grad_norm": 0.49825024604797363, + "learning_rate": 2.0830816885454478e-05, + "loss": 0.1078338623046875, + "step": 5839 + }, + { + "epoch": 0.394754630255509, + "grad_norm": 0.15617726743221283, + "learning_rate": 2.0827776704276313e-05, + "loss": 0.023618698120117188, + "step": 5840 + }, + { + "epoch": 0.39482222522644317, + "grad_norm": 0.7269341349601746, + "learning_rate": 2.082473624111407e-05, + "loss": 0.12108993530273438, + "step": 5841 + }, + { + "epoch": 0.39488982019737734, + "grad_norm": 0.5659052133560181, + "learning_rate": 2.0821695496114862e-05, + "loss": 0.11883544921875, + "step": 5842 + }, + { + "epoch": 0.39495741516831145, + "grad_norm": 1.0369285345077515, + "learning_rate": 2.081865446942582e-05, + "loss": 0.173309326171875, + "step": 5843 + }, + { + "epoch": 0.3950250101392456, + "grad_norm": 1.1653491258621216, + "learning_rate": 2.0815613161194087e-05, + "loss": 0.2242431640625, + "step": 5844 + }, + { + "epoch": 0.3950926051101798, + "grad_norm": 1.455824851989746, + "learning_rate": 2.081257157156683e-05, + "loss": 0.18119049072265625, + "step": 5845 + }, + { + "epoch": 0.39516020008111397, + "grad_norm": 0.2638508379459381, + "learning_rate": 2.0809529700691208e-05, + "loss": 0.054058074951171875, + "step": 5846 + }, + { + "epoch": 0.39522779505204814, + "grad_norm": 0.36686187982559204, + "learning_rate": 2.0806487548714407e-05, + "loss": 0.0849761962890625, + "step": 5847 + }, + { + "epoch": 0.3952953900229823, + "grad_norm": 1.1161493062973022, + "learning_rate": 2.080344511578363e-05, + "loss": 0.1374664306640625, + "step": 5848 + }, + { + "epoch": 0.3953629849939165, + "grad_norm": 0.5512386560440063, + "learning_rate": 2.0800402402046093e-05, + "loss": 0.0760650634765625, + "step": 5849 + }, + { + "epoch": 0.3954305799648506, + "grad_norm": 0.5978230237960815, + "learning_rate": 2.0797359407649012e-05, + "loss": 0.09576797485351562, + "step": 5850 + }, + { + "epoch": 0.39549817493578476, + "grad_norm": 1.236985206604004, + "learning_rate": 2.0794316132739623e-05, + "loss": 0.192657470703125, + "step": 5851 + }, + { + "epoch": 0.39556576990671893, + "grad_norm": 0.6702768802642822, + "learning_rate": 2.079127257746519e-05, + "loss": 0.054782867431640625, + "step": 5852 + }, + { + "epoch": 0.3956333648776531, + "grad_norm": 0.5850512981414795, + "learning_rate": 2.0788228741972976e-05, + "loss": 0.09914016723632812, + "step": 5853 + }, + { + "epoch": 0.3957009598485873, + "grad_norm": 0.3065279722213745, + "learning_rate": 2.0785184626410255e-05, + "loss": 0.06171417236328125, + "step": 5854 + }, + { + "epoch": 0.39576855481952145, + "grad_norm": 0.2753181457519531, + "learning_rate": 2.078214023092433e-05, + "loss": 0.041684627532958984, + "step": 5855 + }, + { + "epoch": 0.3958361497904556, + "grad_norm": 0.5960325598716736, + "learning_rate": 2.0779095555662492e-05, + "loss": 0.10630416870117188, + "step": 5856 + }, + { + "epoch": 0.39590374476138973, + "grad_norm": 1.10495924949646, + "learning_rate": 2.077605060077208e-05, + "loss": 0.12664031982421875, + "step": 5857 + }, + { + "epoch": 0.3959713397323239, + "grad_norm": 1.2199043035507202, + "learning_rate": 2.0773005366400415e-05, + "loss": 0.180419921875, + "step": 5858 + }, + { + "epoch": 0.39603893470325807, + "grad_norm": 0.6193292737007141, + "learning_rate": 2.076995985269485e-05, + "loss": 0.09993743896484375, + "step": 5859 + }, + { + "epoch": 0.39610652967419224, + "grad_norm": 0.6619328856468201, + "learning_rate": 2.0766914059802746e-05, + "loss": 0.145233154296875, + "step": 5860 + }, + { + "epoch": 0.3961741246451264, + "grad_norm": 0.8755393624305725, + "learning_rate": 2.0763867987871476e-05, + "loss": 0.1768035888671875, + "step": 5861 + }, + { + "epoch": 0.3962417196160606, + "grad_norm": 1.2012453079223633, + "learning_rate": 2.0760821637048425e-05, + "loss": 0.20208740234375, + "step": 5862 + }, + { + "epoch": 0.39630931458699475, + "grad_norm": 1.1773486137390137, + "learning_rate": 2.0757775007480996e-05, + "loss": 0.12763214111328125, + "step": 5863 + }, + { + "epoch": 0.39637690955792887, + "grad_norm": 1.428802251815796, + "learning_rate": 2.0754728099316605e-05, + "loss": 0.196746826171875, + "step": 5864 + }, + { + "epoch": 0.39644450452886304, + "grad_norm": 1.1005290746688843, + "learning_rate": 2.075168091270268e-05, + "loss": 0.2451171875, + "step": 5865 + }, + { + "epoch": 0.3965120994997972, + "grad_norm": 0.8197355270385742, + "learning_rate": 2.0748633447786668e-05, + "loss": 0.12200164794921875, + "step": 5866 + }, + { + "epoch": 0.3965796944707314, + "grad_norm": 0.29262205958366394, + "learning_rate": 2.074558570471602e-05, + "loss": 0.06955718994140625, + "step": 5867 + }, + { + "epoch": 0.39664728944166555, + "grad_norm": 1.469201922416687, + "learning_rate": 2.0742537683638196e-05, + "loss": 0.16786956787109375, + "step": 5868 + }, + { + "epoch": 0.3967148844125997, + "grad_norm": 0.5390289425849915, + "learning_rate": 2.073948938470069e-05, + "loss": 0.10408782958984375, + "step": 5869 + }, + { + "epoch": 0.3967824793835339, + "grad_norm": 0.41751617193222046, + "learning_rate": 2.0736440808050996e-05, + "loss": 0.06377410888671875, + "step": 5870 + }, + { + "epoch": 0.396850074354468, + "grad_norm": 0.3297070860862732, + "learning_rate": 2.073339195383662e-05, + "loss": 0.0579071044921875, + "step": 5871 + }, + { + "epoch": 0.3969176693254022, + "grad_norm": 0.8786943554878235, + "learning_rate": 2.0730342822205085e-05, + "loss": 0.18310546875, + "step": 5872 + }, + { + "epoch": 0.39698526429633635, + "grad_norm": 0.187211275100708, + "learning_rate": 2.072729341330393e-05, + "loss": 0.04048919677734375, + "step": 5873 + }, + { + "epoch": 0.3970528592672705, + "grad_norm": 0.4699883759021759, + "learning_rate": 2.07242437272807e-05, + "loss": 0.08576202392578125, + "step": 5874 + }, + { + "epoch": 0.3971204542382047, + "grad_norm": 1.0066059827804565, + "learning_rate": 2.0721193764282963e-05, + "loss": 0.1795501708984375, + "step": 5875 + }, + { + "epoch": 0.39718804920913886, + "grad_norm": 0.4680823087692261, + "learning_rate": 2.0718143524458286e-05, + "loss": 0.080230712890625, + "step": 5876 + }, + { + "epoch": 0.397255644180073, + "grad_norm": 0.24660447239875793, + "learning_rate": 2.0715093007954268e-05, + "loss": 0.038254737854003906, + "step": 5877 + }, + { + "epoch": 0.39732323915100715, + "grad_norm": 0.3704296052455902, + "learning_rate": 2.071204221491851e-05, + "loss": 0.073516845703125, + "step": 5878 + }, + { + "epoch": 0.3973908341219413, + "grad_norm": 1.016296148300171, + "learning_rate": 2.0708991145498627e-05, + "loss": 0.10335159301757812, + "step": 5879 + }, + { + "epoch": 0.3974584290928755, + "grad_norm": 0.2820562422275543, + "learning_rate": 2.0705939799842246e-05, + "loss": 0.0460357666015625, + "step": 5880 + }, + { + "epoch": 0.39752602406380966, + "grad_norm": 0.26154783368110657, + "learning_rate": 2.0702888178097007e-05, + "loss": 0.057281494140625, + "step": 5881 + }, + { + "epoch": 0.39759361903474383, + "grad_norm": 0.7151461243629456, + "learning_rate": 2.069983628041058e-05, + "loss": 0.132965087890625, + "step": 5882 + }, + { + "epoch": 0.397661214005678, + "grad_norm": 0.7586688995361328, + "learning_rate": 2.0696784106930626e-05, + "loss": 0.147918701171875, + "step": 5883 + }, + { + "epoch": 0.3977288089766121, + "grad_norm": 0.7368488311767578, + "learning_rate": 2.069373165780483e-05, + "loss": 0.13446044921875, + "step": 5884 + }, + { + "epoch": 0.3977964039475463, + "grad_norm": 0.6672552227973938, + "learning_rate": 2.0690678933180885e-05, + "loss": 0.12408447265625, + "step": 5885 + }, + { + "epoch": 0.39786399891848045, + "grad_norm": 0.5744823217391968, + "learning_rate": 2.0687625933206505e-05, + "loss": 0.0983123779296875, + "step": 5886 + }, + { + "epoch": 0.3979315938894146, + "grad_norm": 0.999674916267395, + "learning_rate": 2.068457265802941e-05, + "loss": 0.16317367553710938, + "step": 5887 + }, + { + "epoch": 0.3979991888603488, + "grad_norm": 0.5223830342292786, + "learning_rate": 2.068151910779734e-05, + "loss": 0.08010101318359375, + "step": 5888 + }, + { + "epoch": 0.39806678383128297, + "grad_norm": 0.8442199230194092, + "learning_rate": 2.0678465282658038e-05, + "loss": 0.17578125, + "step": 5889 + }, + { + "epoch": 0.39813437880221714, + "grad_norm": 0.35463201999664307, + "learning_rate": 2.0675411182759273e-05, + "loss": 0.07802200317382812, + "step": 5890 + }, + { + "epoch": 0.39820197377315125, + "grad_norm": 1.0050735473632812, + "learning_rate": 2.067235680824882e-05, + "loss": 0.2035064697265625, + "step": 5891 + }, + { + "epoch": 0.3982695687440854, + "grad_norm": 0.6831310987472534, + "learning_rate": 2.0669302159274474e-05, + "loss": 0.11641693115234375, + "step": 5892 + }, + { + "epoch": 0.3983371637150196, + "grad_norm": 0.39488866925239563, + "learning_rate": 2.0666247235984027e-05, + "loss": 0.0817413330078125, + "step": 5893 + }, + { + "epoch": 0.39840475868595376, + "grad_norm": 1.3611105680465698, + "learning_rate": 2.06631920385253e-05, + "loss": 0.1732177734375, + "step": 5894 + }, + { + "epoch": 0.39847235365688793, + "grad_norm": 0.2549769878387451, + "learning_rate": 2.0660136567046126e-05, + "loss": 0.048404693603515625, + "step": 5895 + }, + { + "epoch": 0.3985399486278221, + "grad_norm": 0.4425613284111023, + "learning_rate": 2.0657080821694347e-05, + "loss": 0.049579620361328125, + "step": 5896 + }, + { + "epoch": 0.3986075435987563, + "grad_norm": 0.5827382206916809, + "learning_rate": 2.065402480261781e-05, + "loss": 0.141632080078125, + "step": 5897 + }, + { + "epoch": 0.3986751385696904, + "grad_norm": 0.406465619802475, + "learning_rate": 2.0650968509964397e-05, + "loss": 0.036663055419921875, + "step": 5898 + }, + { + "epoch": 0.39874273354062456, + "grad_norm": 0.4549005925655365, + "learning_rate": 2.0647911943881986e-05, + "loss": 0.09128570556640625, + "step": 5899 + }, + { + "epoch": 0.39881032851155873, + "grad_norm": 0.4156472682952881, + "learning_rate": 2.0644855104518465e-05, + "loss": 0.096893310546875, + "step": 5900 + }, + { + "epoch": 0.3988779234824929, + "grad_norm": 0.9814301133155823, + "learning_rate": 2.0641797992021753e-05, + "loss": 0.164642333984375, + "step": 5901 + }, + { + "epoch": 0.3989455184534271, + "grad_norm": 0.41224727034568787, + "learning_rate": 2.0638740606539764e-05, + "loss": 0.06463623046875, + "step": 5902 + }, + { + "epoch": 0.39901311342436124, + "grad_norm": 0.3630877435207367, + "learning_rate": 2.0635682948220442e-05, + "loss": 0.0756072998046875, + "step": 5903 + }, + { + "epoch": 0.3990807083952954, + "grad_norm": 0.519629716873169, + "learning_rate": 2.0632625017211728e-05, + "loss": 0.12442779541015625, + "step": 5904 + }, + { + "epoch": 0.39914830336622953, + "grad_norm": 0.580055296421051, + "learning_rate": 2.062956681366159e-05, + "loss": 0.1257476806640625, + "step": 5905 + }, + { + "epoch": 0.3992158983371637, + "grad_norm": 0.31316789984703064, + "learning_rate": 2.0626508337717994e-05, + "loss": 0.077117919921875, + "step": 5906 + }, + { + "epoch": 0.39928349330809787, + "grad_norm": 0.5502747893333435, + "learning_rate": 2.062344958952894e-05, + "loss": 0.1443023681640625, + "step": 5907 + }, + { + "epoch": 0.39935108827903204, + "grad_norm": 0.3440399169921875, + "learning_rate": 2.0620390569242423e-05, + "loss": 0.061004638671875, + "step": 5908 + }, + { + "epoch": 0.3994186832499662, + "grad_norm": 0.25834518671035767, + "learning_rate": 2.0617331277006453e-05, + "loss": 0.04154205322265625, + "step": 5909 + }, + { + "epoch": 0.3994862782209004, + "grad_norm": 0.7109350562095642, + "learning_rate": 2.0614271712969062e-05, + "loss": 0.1167755126953125, + "step": 5910 + }, + { + "epoch": 0.39955387319183455, + "grad_norm": 0.7954093217849731, + "learning_rate": 2.0611211877278295e-05, + "loss": 0.172576904296875, + "step": 5911 + }, + { + "epoch": 0.39962146816276867, + "grad_norm": 0.5820196866989136, + "learning_rate": 2.06081517700822e-05, + "loss": 0.1470794677734375, + "step": 5912 + }, + { + "epoch": 0.39968906313370284, + "grad_norm": 0.29980775713920593, + "learning_rate": 2.0605091391528843e-05, + "loss": 0.0552520751953125, + "step": 5913 + }, + { + "epoch": 0.399756658104637, + "grad_norm": 0.43001386523246765, + "learning_rate": 2.060203074176631e-05, + "loss": 0.0992431640625, + "step": 5914 + }, + { + "epoch": 0.3998242530755712, + "grad_norm": 0.30571672320365906, + "learning_rate": 2.059896982094269e-05, + "loss": 0.0564727783203125, + "step": 5915 + }, + { + "epoch": 0.39989184804650535, + "grad_norm": 0.5652135610580444, + "learning_rate": 2.059590862920609e-05, + "loss": 0.0975189208984375, + "step": 5916 + }, + { + "epoch": 0.3999594430174395, + "grad_norm": 0.8918372392654419, + "learning_rate": 2.059284716670463e-05, + "loss": 0.1675262451171875, + "step": 5917 + }, + { + "epoch": 0.4000270379883737, + "grad_norm": 0.5010042190551758, + "learning_rate": 2.0589785433586445e-05, + "loss": 0.08759880065917969, + "step": 5918 + }, + { + "epoch": 0.4000946329593078, + "grad_norm": 0.860171914100647, + "learning_rate": 2.0586723429999678e-05, + "loss": 0.11429595947265625, + "step": 5919 + }, + { + "epoch": 0.400162227930242, + "grad_norm": 0.8264036178588867, + "learning_rate": 2.0583661156092483e-05, + "loss": 0.11701202392578125, + "step": 5920 + }, + { + "epoch": 0.40022982290117615, + "grad_norm": 0.4725465178489685, + "learning_rate": 2.058059861201304e-05, + "loss": 0.10106658935546875, + "step": 5921 + }, + { + "epoch": 0.4002974178721103, + "grad_norm": 0.34806114435195923, + "learning_rate": 2.057753579790953e-05, + "loss": 0.0720977783203125, + "step": 5922 + }, + { + "epoch": 0.4003650128430445, + "grad_norm": 0.7035598158836365, + "learning_rate": 2.0574472713930154e-05, + "loss": 0.2022705078125, + "step": 5923 + }, + { + "epoch": 0.40043260781397866, + "grad_norm": 0.22388626635074615, + "learning_rate": 2.0571409360223118e-05, + "loss": 0.04891395568847656, + "step": 5924 + }, + { + "epoch": 0.40050020278491283, + "grad_norm": 0.30955415964126587, + "learning_rate": 2.056834573693665e-05, + "loss": 0.0511016845703125, + "step": 5925 + }, + { + "epoch": 0.40056779775584694, + "grad_norm": 0.4983168840408325, + "learning_rate": 2.0565281844218988e-05, + "loss": 0.113861083984375, + "step": 5926 + }, + { + "epoch": 0.4006353927267811, + "grad_norm": 0.4550032317638397, + "learning_rate": 2.0562217682218375e-05, + "loss": 0.1012725830078125, + "step": 5927 + }, + { + "epoch": 0.4007029876977153, + "grad_norm": 0.7869526147842407, + "learning_rate": 2.0559153251083086e-05, + "loss": 0.1365814208984375, + "step": 5928 + }, + { + "epoch": 0.40077058266864946, + "grad_norm": 0.412228524684906, + "learning_rate": 2.0556088550961385e-05, + "loss": 0.06153106689453125, + "step": 5929 + }, + { + "epoch": 0.4008381776395836, + "grad_norm": 0.5253711938858032, + "learning_rate": 2.055302358200157e-05, + "loss": 0.088653564453125, + "step": 5930 + }, + { + "epoch": 0.4009057726105178, + "grad_norm": 0.7085134983062744, + "learning_rate": 2.054995834435194e-05, + "loss": 0.1536102294921875, + "step": 5931 + }, + { + "epoch": 0.40097336758145197, + "grad_norm": 1.105750560760498, + "learning_rate": 2.0546892838160812e-05, + "loss": 0.1857147216796875, + "step": 5932 + }, + { + "epoch": 0.4010409625523861, + "grad_norm": 0.6102815866470337, + "learning_rate": 2.0543827063576513e-05, + "loss": 0.12188720703125, + "step": 5933 + }, + { + "epoch": 0.40110855752332025, + "grad_norm": 0.5317314863204956, + "learning_rate": 2.054076102074738e-05, + "loss": 0.1279754638671875, + "step": 5934 + }, + { + "epoch": 0.4011761524942544, + "grad_norm": 0.6335155963897705, + "learning_rate": 2.0537694709821774e-05, + "loss": 0.14847564697265625, + "step": 5935 + }, + { + "epoch": 0.4012437474651886, + "grad_norm": 0.7579799294471741, + "learning_rate": 2.0534628130948056e-05, + "loss": 0.1323089599609375, + "step": 5936 + }, + { + "epoch": 0.40131134243612276, + "grad_norm": 1.0848782062530518, + "learning_rate": 2.0531561284274613e-05, + "loss": 0.186065673828125, + "step": 5937 + }, + { + "epoch": 0.40137893740705693, + "grad_norm": 0.45419925451278687, + "learning_rate": 2.0528494169949834e-05, + "loss": 0.06946563720703125, + "step": 5938 + }, + { + "epoch": 0.40144653237799105, + "grad_norm": 0.5230982303619385, + "learning_rate": 2.0525426788122127e-05, + "loss": 0.0716400146484375, + "step": 5939 + }, + { + "epoch": 0.4015141273489252, + "grad_norm": 0.44885674118995667, + "learning_rate": 2.0522359138939905e-05, + "loss": 0.0951995849609375, + "step": 5940 + }, + { + "epoch": 0.4015817223198594, + "grad_norm": 1.0490894317626953, + "learning_rate": 2.051929122255161e-05, + "loss": 0.1576385498046875, + "step": 5941 + }, + { + "epoch": 0.40164931729079356, + "grad_norm": 0.5457480549812317, + "learning_rate": 2.051622303910568e-05, + "loss": 0.116455078125, + "step": 5942 + }, + { + "epoch": 0.40171691226172773, + "grad_norm": 0.7599427700042725, + "learning_rate": 2.0513154588750575e-05, + "loss": 0.162841796875, + "step": 5943 + }, + { + "epoch": 0.4017845072326619, + "grad_norm": 0.43257758021354675, + "learning_rate": 2.0510085871634763e-05, + "loss": 0.0601806640625, + "step": 5944 + }, + { + "epoch": 0.4018521022035961, + "grad_norm": 0.5918706655502319, + "learning_rate": 2.050701688790673e-05, + "loss": 0.10840606689453125, + "step": 5945 + }, + { + "epoch": 0.4019196971745302, + "grad_norm": 0.621776819229126, + "learning_rate": 2.0503947637714972e-05, + "loss": 0.11311721801757812, + "step": 5946 + }, + { + "epoch": 0.40198729214546436, + "grad_norm": 0.9831531643867493, + "learning_rate": 2.0500878121208e-05, + "loss": 0.196197509765625, + "step": 5947 + }, + { + "epoch": 0.40205488711639853, + "grad_norm": 0.26938000321388245, + "learning_rate": 2.0497808338534333e-05, + "loss": 0.05507659912109375, + "step": 5948 + }, + { + "epoch": 0.4021224820873327, + "grad_norm": 1.1880141496658325, + "learning_rate": 2.049473828984251e-05, + "loss": 0.19378662109375, + "step": 5949 + }, + { + "epoch": 0.40219007705826687, + "grad_norm": 0.2211887240409851, + "learning_rate": 2.0491667975281076e-05, + "loss": 0.033275604248046875, + "step": 5950 + }, + { + "epoch": 0.40225767202920104, + "grad_norm": 0.36194249987602234, + "learning_rate": 2.0488597394998596e-05, + "loss": 0.04062652587890625, + "step": 5951 + }, + { + "epoch": 0.4023252670001352, + "grad_norm": 1.3812814950942993, + "learning_rate": 2.0485526549143637e-05, + "loss": 0.1486358642578125, + "step": 5952 + }, + { + "epoch": 0.4023928619710693, + "grad_norm": 0.5675753951072693, + "learning_rate": 2.0482455437864788e-05, + "loss": 0.1083831787109375, + "step": 5953 + }, + { + "epoch": 0.4024604569420035, + "grad_norm": 0.5787879228591919, + "learning_rate": 2.047938406131066e-05, + "loss": 0.1257781982421875, + "step": 5954 + }, + { + "epoch": 0.40252805191293767, + "grad_norm": 0.4124034643173218, + "learning_rate": 2.0476312419629845e-05, + "loss": 0.0578765869140625, + "step": 5955 + }, + { + "epoch": 0.40259564688387184, + "grad_norm": 0.5319423675537109, + "learning_rate": 2.047324051297098e-05, + "loss": 0.133758544921875, + "step": 5956 + }, + { + "epoch": 0.402663241854806, + "grad_norm": 0.5029650926589966, + "learning_rate": 2.04701683414827e-05, + "loss": 0.12526702880859375, + "step": 5957 + }, + { + "epoch": 0.4027308368257402, + "grad_norm": 0.8073399066925049, + "learning_rate": 2.046709590531366e-05, + "loss": 0.1626434326171875, + "step": 5958 + }, + { + "epoch": 0.40279843179667435, + "grad_norm": 0.3981330990791321, + "learning_rate": 2.0464023204612523e-05, + "loss": 0.062206268310546875, + "step": 5959 + }, + { + "epoch": 0.40286602676760846, + "grad_norm": 1.3621376752853394, + "learning_rate": 2.0460950239527957e-05, + "loss": 0.1715087890625, + "step": 5960 + }, + { + "epoch": 0.40293362173854264, + "grad_norm": 0.23790931701660156, + "learning_rate": 2.045787701020866e-05, + "loss": 0.04178905487060547, + "step": 5961 + }, + { + "epoch": 0.4030012167094768, + "grad_norm": 0.487602561712265, + "learning_rate": 2.0454803516803334e-05, + "loss": 0.11346435546875, + "step": 5962 + }, + { + "epoch": 0.403068811680411, + "grad_norm": 0.47459280490875244, + "learning_rate": 2.0451729759460686e-05, + "loss": 0.09060287475585938, + "step": 5963 + }, + { + "epoch": 0.40313640665134515, + "grad_norm": 0.44764286279678345, + "learning_rate": 2.0448655738329448e-05, + "loss": 0.06385040283203125, + "step": 5964 + }, + { + "epoch": 0.4032040016222793, + "grad_norm": 0.42716798186302185, + "learning_rate": 2.044558145355836e-05, + "loss": 0.0639495849609375, + "step": 5965 + }, + { + "epoch": 0.4032715965932135, + "grad_norm": 0.9857531785964966, + "learning_rate": 2.0442506905296186e-05, + "loss": 0.17600250244140625, + "step": 5966 + }, + { + "epoch": 0.4033391915641476, + "grad_norm": 0.9463210105895996, + "learning_rate": 2.0439432093691673e-05, + "loss": 0.2345123291015625, + "step": 5967 + }, + { + "epoch": 0.4034067865350818, + "grad_norm": 0.4125935137271881, + "learning_rate": 2.0436357018893607e-05, + "loss": 0.07407379150390625, + "step": 5968 + }, + { + "epoch": 0.40347438150601594, + "grad_norm": 0.3807199001312256, + "learning_rate": 2.0433281681050784e-05, + "loss": 0.05766105651855469, + "step": 5969 + }, + { + "epoch": 0.4035419764769501, + "grad_norm": 1.108945369720459, + "learning_rate": 2.0430206080312e-05, + "loss": 0.14472198486328125, + "step": 5970 + }, + { + "epoch": 0.4036095714478843, + "grad_norm": 0.8307713270187378, + "learning_rate": 2.0427130216826077e-05, + "loss": 0.08639907836914062, + "step": 5971 + }, + { + "epoch": 0.40367716641881846, + "grad_norm": 0.7101582884788513, + "learning_rate": 2.0424054090741844e-05, + "loss": 0.1482696533203125, + "step": 5972 + }, + { + "epoch": 0.4037447613897526, + "grad_norm": 0.4267736077308655, + "learning_rate": 2.042097770220814e-05, + "loss": 0.0818328857421875, + "step": 5973 + }, + { + "epoch": 0.40381235636068674, + "grad_norm": 0.5647891759872437, + "learning_rate": 2.0417901051373825e-05, + "loss": 0.1290283203125, + "step": 5974 + }, + { + "epoch": 0.4038799513316209, + "grad_norm": 0.3578048348426819, + "learning_rate": 2.0414824138387764e-05, + "loss": 0.06640625, + "step": 5975 + }, + { + "epoch": 0.4039475463025551, + "grad_norm": 0.39505016803741455, + "learning_rate": 2.0411746963398834e-05, + "loss": 0.0916290283203125, + "step": 5976 + }, + { + "epoch": 0.40401514127348925, + "grad_norm": 0.35042881965637207, + "learning_rate": 2.0408669526555933e-05, + "loss": 0.05840301513671875, + "step": 5977 + }, + { + "epoch": 0.4040827362444234, + "grad_norm": 0.4905414283275604, + "learning_rate": 2.0405591828007962e-05, + "loss": 0.12380218505859375, + "step": 5978 + }, + { + "epoch": 0.4041503312153576, + "grad_norm": 0.412842333316803, + "learning_rate": 2.0402513867903845e-05, + "loss": 0.0763702392578125, + "step": 5979 + }, + { + "epoch": 0.40421792618629176, + "grad_norm": 0.7208424210548401, + "learning_rate": 2.0399435646392505e-05, + "loss": 0.07573699951171875, + "step": 5980 + }, + { + "epoch": 0.4042855211572259, + "grad_norm": 0.592342734336853, + "learning_rate": 2.039635716362289e-05, + "loss": 0.1494140625, + "step": 5981 + }, + { + "epoch": 0.40435311612816005, + "grad_norm": 0.8026919960975647, + "learning_rate": 2.0393278419743958e-05, + "loss": 0.182647705078125, + "step": 5982 + }, + { + "epoch": 0.4044207110990942, + "grad_norm": 0.5663645267486572, + "learning_rate": 2.0390199414904677e-05, + "loss": 0.140838623046875, + "step": 5983 + }, + { + "epoch": 0.4044883060700284, + "grad_norm": 0.17406047880649567, + "learning_rate": 2.0387120149254024e-05, + "loss": 0.037548065185546875, + "step": 5984 + }, + { + "epoch": 0.40455590104096256, + "grad_norm": 1.4119117259979248, + "learning_rate": 2.0384040622941e-05, + "loss": 0.219451904296875, + "step": 5985 + }, + { + "epoch": 0.40462349601189673, + "grad_norm": 0.8798931837081909, + "learning_rate": 2.03809608361146e-05, + "loss": 0.15129852294921875, + "step": 5986 + }, + { + "epoch": 0.4046910909828309, + "grad_norm": 0.24318750202655792, + "learning_rate": 2.0377880788923853e-05, + "loss": 0.0370025634765625, + "step": 5987 + }, + { + "epoch": 0.404758685953765, + "grad_norm": 0.7849600315093994, + "learning_rate": 2.0374800481517793e-05, + "loss": 0.12353515625, + "step": 5988 + }, + { + "epoch": 0.4048262809246992, + "grad_norm": 1.160251498222351, + "learning_rate": 2.037171991404546e-05, + "loss": 0.230712890625, + "step": 5989 + }, + { + "epoch": 0.40489387589563336, + "grad_norm": 0.42447441816329956, + "learning_rate": 2.036863908665591e-05, + "loss": 0.08490753173828125, + "step": 5990 + }, + { + "epoch": 0.40496147086656753, + "grad_norm": 1.0647811889648438, + "learning_rate": 2.0365557999498213e-05, + "loss": 0.138153076171875, + "step": 5991 + }, + { + "epoch": 0.4050290658375017, + "grad_norm": 0.4733331501483917, + "learning_rate": 2.0362476652721453e-05, + "loss": 0.0687103271484375, + "step": 5992 + }, + { + "epoch": 0.40509666080843587, + "grad_norm": 0.23200276494026184, + "learning_rate": 2.0359395046474722e-05, + "loss": 0.04817771911621094, + "step": 5993 + }, + { + "epoch": 0.40516425577937004, + "grad_norm": 0.7839571833610535, + "learning_rate": 2.0356313180907128e-05, + "loss": 0.11577320098876953, + "step": 5994 + }, + { + "epoch": 0.40523185075030416, + "grad_norm": 0.37802883982658386, + "learning_rate": 2.0353231056167793e-05, + "loss": 0.0824737548828125, + "step": 5995 + }, + { + "epoch": 0.4052994457212383, + "grad_norm": 0.36459028720855713, + "learning_rate": 2.035014867240585e-05, + "loss": 0.051300048828125, + "step": 5996 + }, + { + "epoch": 0.4053670406921725, + "grad_norm": 0.7854253053665161, + "learning_rate": 2.0347066029770444e-05, + "loss": 0.12781143188476562, + "step": 5997 + }, + { + "epoch": 0.40543463566310667, + "grad_norm": 0.2522524297237396, + "learning_rate": 2.0343983128410724e-05, + "loss": 0.04865264892578125, + "step": 5998 + }, + { + "epoch": 0.40550223063404084, + "grad_norm": 0.8912851810455322, + "learning_rate": 2.0340899968475867e-05, + "loss": 0.1330890655517578, + "step": 5999 + }, + { + "epoch": 0.405569825604975, + "grad_norm": 1.0943855047225952, + "learning_rate": 2.033781655011506e-05, + "loss": 0.15695953369140625, + "step": 6000 + }, + { + "epoch": 0.4056374205759092, + "grad_norm": 1.5114514827728271, + "learning_rate": 2.033473287347749e-05, + "loss": 0.252838134765625, + "step": 6001 + }, + { + "epoch": 0.4057050155468433, + "grad_norm": 0.5576444864273071, + "learning_rate": 2.0331648938712366e-05, + "loss": 0.105682373046875, + "step": 6002 + }, + { + "epoch": 0.40577261051777747, + "grad_norm": 0.3673245906829834, + "learning_rate": 2.032856474596891e-05, + "loss": 0.05963134765625, + "step": 6003 + }, + { + "epoch": 0.40584020548871164, + "grad_norm": 0.4879542589187622, + "learning_rate": 2.0325480295396356e-05, + "loss": 0.070709228515625, + "step": 6004 + }, + { + "epoch": 0.4059078004596458, + "grad_norm": 0.7277565598487854, + "learning_rate": 2.0322395587143947e-05, + "loss": 0.187164306640625, + "step": 6005 + }, + { + "epoch": 0.40597539543058, + "grad_norm": 0.745761513710022, + "learning_rate": 2.0319310621360935e-05, + "loss": 0.14166259765625, + "step": 6006 + }, + { + "epoch": 0.40604299040151415, + "grad_norm": 0.3590554893016815, + "learning_rate": 2.0316225398196594e-05, + "loss": 0.0855712890625, + "step": 6007 + }, + { + "epoch": 0.40611058537244826, + "grad_norm": 0.33500581979751587, + "learning_rate": 2.0313139917800213e-05, + "loss": 0.0511627197265625, + "step": 6008 + }, + { + "epoch": 0.40617818034338243, + "grad_norm": 1.0459541082382202, + "learning_rate": 2.0310054180321078e-05, + "loss": 0.18621826171875, + "step": 6009 + }, + { + "epoch": 0.4062457753143166, + "grad_norm": 0.5651499032974243, + "learning_rate": 2.0306968185908502e-05, + "loss": 0.1390380859375, + "step": 6010 + }, + { + "epoch": 0.4063133702852508, + "grad_norm": 0.35590723156929016, + "learning_rate": 2.0303881934711793e-05, + "loss": 0.072174072265625, + "step": 6011 + }, + { + "epoch": 0.40638096525618495, + "grad_norm": 0.3090371787548065, + "learning_rate": 2.03007954268803e-05, + "loss": 0.059967041015625, + "step": 6012 + }, + { + "epoch": 0.4064485602271191, + "grad_norm": 0.37590330839157104, + "learning_rate": 2.0297708662563353e-05, + "loss": 0.05431365966796875, + "step": 6013 + }, + { + "epoch": 0.4065161551980533, + "grad_norm": 0.7110050916671753, + "learning_rate": 2.029462164191032e-05, + "loss": 0.175537109375, + "step": 6014 + }, + { + "epoch": 0.4065837501689874, + "grad_norm": 0.305152028799057, + "learning_rate": 2.0291534365070563e-05, + "loss": 0.049922943115234375, + "step": 6015 + }, + { + "epoch": 0.40665134513992157, + "grad_norm": 0.6802307367324829, + "learning_rate": 2.0288446832193465e-05, + "loss": 0.130859375, + "step": 6016 + }, + { + "epoch": 0.40671894011085574, + "grad_norm": 0.4842284321784973, + "learning_rate": 2.028535904342842e-05, + "loss": 0.1129913330078125, + "step": 6017 + }, + { + "epoch": 0.4067865350817899, + "grad_norm": 0.2876521348953247, + "learning_rate": 2.028227099892484e-05, + "loss": 0.04013824462890625, + "step": 6018 + }, + { + "epoch": 0.4068541300527241, + "grad_norm": 1.4515434503555298, + "learning_rate": 2.0279182698832127e-05, + "loss": 0.14980316162109375, + "step": 6019 + }, + { + "epoch": 0.40692172502365825, + "grad_norm": 0.5649705529212952, + "learning_rate": 2.0276094143299734e-05, + "loss": 0.099395751953125, + "step": 6020 + }, + { + "epoch": 0.4069893199945924, + "grad_norm": 0.5594387650489807, + "learning_rate": 2.0273005332477096e-05, + "loss": 0.1182861328125, + "step": 6021 + }, + { + "epoch": 0.40705691496552654, + "grad_norm": 0.4725598692893982, + "learning_rate": 2.0269916266513666e-05, + "loss": 0.0674285888671875, + "step": 6022 + }, + { + "epoch": 0.4071245099364607, + "grad_norm": 0.7975713610649109, + "learning_rate": 2.026682694555891e-05, + "loss": 0.1315460205078125, + "step": 6023 + }, + { + "epoch": 0.4071921049073949, + "grad_norm": 0.5736833214759827, + "learning_rate": 2.0263737369762318e-05, + "loss": 0.0961456298828125, + "step": 6024 + }, + { + "epoch": 0.40725969987832905, + "grad_norm": 1.108867883682251, + "learning_rate": 2.0260647539273374e-05, + "loss": 0.15983963012695312, + "step": 6025 + }, + { + "epoch": 0.4073272948492632, + "grad_norm": 0.5130767822265625, + "learning_rate": 2.0257557454241584e-05, + "loss": 0.10029983520507812, + "step": 6026 + }, + { + "epoch": 0.4073948898201974, + "grad_norm": 0.4486792981624603, + "learning_rate": 2.025446711481647e-05, + "loss": 0.0712432861328125, + "step": 6027 + }, + { + "epoch": 0.40746248479113156, + "grad_norm": 0.24359652400016785, + "learning_rate": 2.025137652114756e-05, + "loss": 0.0490264892578125, + "step": 6028 + }, + { + "epoch": 0.4075300797620657, + "grad_norm": 0.27789247035980225, + "learning_rate": 2.0248285673384396e-05, + "loss": 0.0562286376953125, + "step": 6029 + }, + { + "epoch": 0.40759767473299985, + "grad_norm": 0.34845060110092163, + "learning_rate": 2.0245194571676533e-05, + "loss": 0.047664642333984375, + "step": 6030 + }, + { + "epoch": 0.407665269703934, + "grad_norm": 0.6991007328033447, + "learning_rate": 2.0242103216173537e-05, + "loss": 0.15460205078125, + "step": 6031 + }, + { + "epoch": 0.4077328646748682, + "grad_norm": 0.6851725578308105, + "learning_rate": 2.0239011607024983e-05, + "loss": 0.15999603271484375, + "step": 6032 + }, + { + "epoch": 0.40780045964580236, + "grad_norm": 0.29101330041885376, + "learning_rate": 2.0235919744380475e-05, + "loss": 0.047229766845703125, + "step": 6033 + }, + { + "epoch": 0.40786805461673653, + "grad_norm": 0.6357691884040833, + "learning_rate": 2.0232827628389597e-05, + "loss": 0.1458282470703125, + "step": 6034 + }, + { + "epoch": 0.4079356495876707, + "grad_norm": 0.8979963660240173, + "learning_rate": 2.0229735259201988e-05, + "loss": 0.227783203125, + "step": 6035 + }, + { + "epoch": 0.4080032445586048, + "grad_norm": 0.22265534102916718, + "learning_rate": 2.0226642636967254e-05, + "loss": 0.03208160400390625, + "step": 6036 + }, + { + "epoch": 0.408070839529539, + "grad_norm": 0.6554054021835327, + "learning_rate": 2.022354976183505e-05, + "loss": 0.1438446044921875, + "step": 6037 + }, + { + "epoch": 0.40813843450047316, + "grad_norm": 1.0875728130340576, + "learning_rate": 2.0220456633955023e-05, + "loss": 0.189666748046875, + "step": 6038 + }, + { + "epoch": 0.40820602947140733, + "grad_norm": 0.9955306053161621, + "learning_rate": 2.0217363253476838e-05, + "loss": 0.2115478515625, + "step": 6039 + }, + { + "epoch": 0.4082736244423415, + "grad_norm": 0.24021349847316742, + "learning_rate": 2.021426962055017e-05, + "loss": 0.0469818115234375, + "step": 6040 + }, + { + "epoch": 0.40834121941327567, + "grad_norm": 0.9503873586654663, + "learning_rate": 2.0211175735324713e-05, + "loss": 0.11889266967773438, + "step": 6041 + }, + { + "epoch": 0.40840881438420984, + "grad_norm": 0.17041419446468353, + "learning_rate": 2.0208081597950165e-05, + "loss": 0.03289365768432617, + "step": 6042 + }, + { + "epoch": 0.40847640935514395, + "grad_norm": 0.48439496755599976, + "learning_rate": 2.0204987208576244e-05, + "loss": 0.06528663635253906, + "step": 6043 + }, + { + "epoch": 0.4085440043260781, + "grad_norm": 1.222747802734375, + "learning_rate": 2.0201892567352665e-05, + "loss": 0.13553237915039062, + "step": 6044 + }, + { + "epoch": 0.4086115992970123, + "grad_norm": 0.366765558719635, + "learning_rate": 2.0198797674429177e-05, + "loss": 0.051914215087890625, + "step": 6045 + }, + { + "epoch": 0.40867919426794647, + "grad_norm": 1.0942248106002808, + "learning_rate": 2.019570252995553e-05, + "loss": 0.207916259765625, + "step": 6046 + }, + { + "epoch": 0.40874678923888064, + "grad_norm": 0.43140217661857605, + "learning_rate": 2.0192607134081483e-05, + "loss": 0.10189437866210938, + "step": 6047 + }, + { + "epoch": 0.4088143842098148, + "grad_norm": 0.22569729387760162, + "learning_rate": 2.0189511486956806e-05, + "loss": 0.032894134521484375, + "step": 6048 + }, + { + "epoch": 0.408881979180749, + "grad_norm": 0.3213609457015991, + "learning_rate": 2.018641558873129e-05, + "loss": 0.043521881103515625, + "step": 6049 + }, + { + "epoch": 0.4089495741516831, + "grad_norm": 1.098210096359253, + "learning_rate": 2.0183319439554737e-05, + "loss": 0.1536407470703125, + "step": 6050 + }, + { + "epoch": 0.40901716912261726, + "grad_norm": 0.5581879615783691, + "learning_rate": 2.0180223039576954e-05, + "loss": 0.1225128173828125, + "step": 6051 + }, + { + "epoch": 0.40908476409355143, + "grad_norm": 0.631705105304718, + "learning_rate": 2.017712638894776e-05, + "loss": 0.10174560546875, + "step": 6052 + }, + { + "epoch": 0.4091523590644856, + "grad_norm": 0.8651372194290161, + "learning_rate": 2.0174029487817e-05, + "loss": 0.1389312744140625, + "step": 6053 + }, + { + "epoch": 0.4092199540354198, + "grad_norm": 1.1667194366455078, + "learning_rate": 2.0170932336334518e-05, + "loss": 0.083953857421875, + "step": 6054 + }, + { + "epoch": 0.40928754900635395, + "grad_norm": 0.6224724054336548, + "learning_rate": 2.0167834934650167e-05, + "loss": 0.1186065673828125, + "step": 6055 + }, + { + "epoch": 0.4093551439772881, + "grad_norm": 0.7406361103057861, + "learning_rate": 2.0164737282913827e-05, + "loss": 0.08177947998046875, + "step": 6056 + }, + { + "epoch": 0.40942273894822223, + "grad_norm": 0.8562703728675842, + "learning_rate": 2.0161639381275378e-05, + "loss": 0.149658203125, + "step": 6057 + }, + { + "epoch": 0.4094903339191564, + "grad_norm": 0.4410417079925537, + "learning_rate": 2.0158541229884714e-05, + "loss": 0.103302001953125, + "step": 6058 + }, + { + "epoch": 0.4095579288900906, + "grad_norm": 0.29170602560043335, + "learning_rate": 2.015544282889175e-05, + "loss": 0.05029487609863281, + "step": 6059 + }, + { + "epoch": 0.40962552386102474, + "grad_norm": 0.9158741235733032, + "learning_rate": 2.0152344178446397e-05, + "loss": 0.18865966796875, + "step": 6060 + }, + { + "epoch": 0.4096931188319589, + "grad_norm": 1.1221611499786377, + "learning_rate": 2.0149245278698586e-05, + "loss": 0.167877197265625, + "step": 6061 + }, + { + "epoch": 0.4097607138028931, + "grad_norm": 0.33451008796691895, + "learning_rate": 2.0146146129798268e-05, + "loss": 0.06298065185546875, + "step": 6062 + }, + { + "epoch": 0.40982830877382725, + "grad_norm": 0.2671951949596405, + "learning_rate": 2.01430467318954e-05, + "loss": 0.035064697265625, + "step": 6063 + }, + { + "epoch": 0.40989590374476137, + "grad_norm": 0.38015124201774597, + "learning_rate": 2.0139947085139946e-05, + "loss": 0.0937347412109375, + "step": 6064 + }, + { + "epoch": 0.40996349871569554, + "grad_norm": 0.9474417567253113, + "learning_rate": 2.013684718968188e-05, + "loss": 0.1309356689453125, + "step": 6065 + }, + { + "epoch": 0.4100310936866297, + "grad_norm": 1.938194990158081, + "learning_rate": 2.0133747045671212e-05, + "loss": 0.28533935546875, + "step": 6066 + }, + { + "epoch": 0.4100986886575639, + "grad_norm": 0.7656173706054688, + "learning_rate": 2.013064665325793e-05, + "loss": 0.09814834594726562, + "step": 6067 + }, + { + "epoch": 0.41016628362849805, + "grad_norm": 0.5332352519035339, + "learning_rate": 2.0127546012592055e-05, + "loss": 0.10889434814453125, + "step": 6068 + }, + { + "epoch": 0.4102338785994322, + "grad_norm": 0.4963589906692505, + "learning_rate": 2.012444512382362e-05, + "loss": 0.0690460205078125, + "step": 6069 + }, + { + "epoch": 0.41030147357036634, + "grad_norm": 0.36501702666282654, + "learning_rate": 2.0121343987102657e-05, + "loss": 0.042690277099609375, + "step": 6070 + }, + { + "epoch": 0.4103690685413005, + "grad_norm": 0.9767955541610718, + "learning_rate": 2.011824260257923e-05, + "loss": 0.1371917724609375, + "step": 6071 + }, + { + "epoch": 0.4104366635122347, + "grad_norm": 0.29588058590888977, + "learning_rate": 2.0115140970403387e-05, + "loss": 0.05316162109375, + "step": 6072 + }, + { + "epoch": 0.41050425848316885, + "grad_norm": 0.6218798756599426, + "learning_rate": 2.0112039090725214e-05, + "loss": 0.1313629150390625, + "step": 6073 + }, + { + "epoch": 0.410571853454103, + "grad_norm": 0.5445687174797058, + "learning_rate": 2.01089369636948e-05, + "loss": 0.1209716796875, + "step": 6074 + }, + { + "epoch": 0.4106394484250372, + "grad_norm": 0.39519035816192627, + "learning_rate": 2.010583458946225e-05, + "loss": 0.073089599609375, + "step": 6075 + }, + { + "epoch": 0.41070704339597136, + "grad_norm": 0.2602115273475647, + "learning_rate": 2.010273196817766e-05, + "loss": 0.060546875, + "step": 6076 + }, + { + "epoch": 0.4107746383669055, + "grad_norm": 0.22520498931407928, + "learning_rate": 2.0099629099991168e-05, + "loss": 0.038234710693359375, + "step": 6077 + }, + { + "epoch": 0.41084223333783965, + "grad_norm": 0.5294480323791504, + "learning_rate": 2.0096525985052905e-05, + "loss": 0.1187744140625, + "step": 6078 + }, + { + "epoch": 0.4109098283087738, + "grad_norm": 0.5913657546043396, + "learning_rate": 2.009342262351302e-05, + "loss": 0.10585403442382812, + "step": 6079 + }, + { + "epoch": 0.410977423279708, + "grad_norm": 0.33659884333610535, + "learning_rate": 2.0090319015521674e-05, + "loss": 0.07523345947265625, + "step": 6080 + }, + { + "epoch": 0.41104501825064216, + "grad_norm": 0.4682588577270508, + "learning_rate": 2.008721516122904e-05, + "loss": 0.119232177734375, + "step": 6081 + }, + { + "epoch": 0.41111261322157633, + "grad_norm": 0.7079012989997864, + "learning_rate": 2.0084111060785293e-05, + "loss": 0.2122650146484375, + "step": 6082 + }, + { + "epoch": 0.4111802081925105, + "grad_norm": 0.29700711369514465, + "learning_rate": 2.0081006714340645e-05, + "loss": 0.0563201904296875, + "step": 6083 + }, + { + "epoch": 0.4112478031634446, + "grad_norm": 0.5618225336074829, + "learning_rate": 2.0077902122045286e-05, + "loss": 0.1298065185546875, + "step": 6084 + }, + { + "epoch": 0.4113153981343788, + "grad_norm": 0.8365662693977356, + "learning_rate": 2.0074797284049444e-05, + "loss": 0.15118408203125, + "step": 6085 + }, + { + "epoch": 0.41138299310531296, + "grad_norm": 0.6501104831695557, + "learning_rate": 2.007169220050335e-05, + "loss": 0.1309814453125, + "step": 6086 + }, + { + "epoch": 0.4114505880762471, + "grad_norm": 0.43243008852005005, + "learning_rate": 2.0068586871557246e-05, + "loss": 0.06675338745117188, + "step": 6087 + }, + { + "epoch": 0.4115181830471813, + "grad_norm": 1.0940625667572021, + "learning_rate": 2.0065481297361386e-05, + "loss": 0.1665802001953125, + "step": 6088 + }, + { + "epoch": 0.41158577801811547, + "grad_norm": 0.5577008724212646, + "learning_rate": 2.0062375478066044e-05, + "loss": 0.10986328125, + "step": 6089 + }, + { + "epoch": 0.41165337298904964, + "grad_norm": 0.891960620880127, + "learning_rate": 2.0059269413821492e-05, + "loss": 0.1323089599609375, + "step": 6090 + }, + { + "epoch": 0.41172096795998375, + "grad_norm": 0.9750412106513977, + "learning_rate": 2.005616310477802e-05, + "loss": 0.232177734375, + "step": 6091 + }, + { + "epoch": 0.4117885629309179, + "grad_norm": 0.4139559864997864, + "learning_rate": 2.0053056551085937e-05, + "loss": 0.0676727294921875, + "step": 6092 + }, + { + "epoch": 0.4118561579018521, + "grad_norm": 0.6357114315032959, + "learning_rate": 2.0049949752895552e-05, + "loss": 0.099884033203125, + "step": 6093 + }, + { + "epoch": 0.41192375287278626, + "grad_norm": 0.28575775027275085, + "learning_rate": 2.0046842710357192e-05, + "loss": 0.04395484924316406, + "step": 6094 + }, + { + "epoch": 0.41199134784372043, + "grad_norm": 2.964301586151123, + "learning_rate": 2.00437354236212e-05, + "loss": 0.253875732421875, + "step": 6095 + }, + { + "epoch": 0.4120589428146546, + "grad_norm": 1.1787991523742676, + "learning_rate": 2.0040627892837914e-05, + "loss": 0.1536865234375, + "step": 6096 + }, + { + "epoch": 0.4121265377855888, + "grad_norm": 1.73300039768219, + "learning_rate": 2.003752011815771e-05, + "loss": 0.19518280029296875, + "step": 6097 + }, + { + "epoch": 0.4121941327565229, + "grad_norm": 0.37412458658218384, + "learning_rate": 2.003441209973095e-05, + "loss": 0.061920166015625, + "step": 6098 + }, + { + "epoch": 0.41226172772745706, + "grad_norm": 0.751863956451416, + "learning_rate": 2.0031303837708027e-05, + "loss": 0.11850738525390625, + "step": 6099 + }, + { + "epoch": 0.41232932269839123, + "grad_norm": 0.47849202156066895, + "learning_rate": 2.0028195332239335e-05, + "loss": 0.0986480712890625, + "step": 6100 + }, + { + "epoch": 0.4123969176693254, + "grad_norm": 0.29716184735298157, + "learning_rate": 2.002508658347528e-05, + "loss": 0.040771484375, + "step": 6101 + }, + { + "epoch": 0.4124645126402596, + "grad_norm": 0.7914508581161499, + "learning_rate": 2.0021977591566285e-05, + "loss": 0.178863525390625, + "step": 6102 + }, + { + "epoch": 0.41253210761119374, + "grad_norm": 0.9186586141586304, + "learning_rate": 2.0018868356662784e-05, + "loss": 0.226593017578125, + "step": 6103 + }, + { + "epoch": 0.4125997025821279, + "grad_norm": 0.5337146520614624, + "learning_rate": 2.0015758878915217e-05, + "loss": 0.0907745361328125, + "step": 6104 + }, + { + "epoch": 0.41266729755306203, + "grad_norm": 0.1991415023803711, + "learning_rate": 2.001264915847405e-05, + "loss": 0.03192901611328125, + "step": 6105 + }, + { + "epoch": 0.4127348925239962, + "grad_norm": 0.43771281838417053, + "learning_rate": 2.000953919548974e-05, + "loss": 0.1145172119140625, + "step": 6106 + }, + { + "epoch": 0.41280248749493037, + "grad_norm": 0.2979854643344879, + "learning_rate": 2.000642899011277e-05, + "loss": 0.03968048095703125, + "step": 6107 + }, + { + "epoch": 0.41287008246586454, + "grad_norm": 0.5737060308456421, + "learning_rate": 2.0003318542493633e-05, + "loss": 0.146942138671875, + "step": 6108 + }, + { + "epoch": 0.4129376774367987, + "grad_norm": 1.1824978590011597, + "learning_rate": 2.0000207852782825e-05, + "loss": 0.231231689453125, + "step": 6109 + }, + { + "epoch": 0.4130052724077329, + "grad_norm": 0.2730104327201843, + "learning_rate": 1.9997096921130865e-05, + "loss": 0.053679466247558594, + "step": 6110 + }, + { + "epoch": 0.41307286737866705, + "grad_norm": 0.47393760085105896, + "learning_rate": 1.999398574768828e-05, + "loss": 0.08202362060546875, + "step": 6111 + }, + { + "epoch": 0.41314046234960117, + "grad_norm": 0.6446714997291565, + "learning_rate": 1.999087433260561e-05, + "loss": 0.12605667114257812, + "step": 6112 + }, + { + "epoch": 0.41320805732053534, + "grad_norm": 1.397969126701355, + "learning_rate": 1.99877626760334e-05, + "loss": 0.26513671875, + "step": 6113 + }, + { + "epoch": 0.4132756522914695, + "grad_norm": 0.43083447217941284, + "learning_rate": 1.9984650778122214e-05, + "loss": 0.054790496826171875, + "step": 6114 + }, + { + "epoch": 0.4133432472624037, + "grad_norm": 0.7648603320121765, + "learning_rate": 1.9981538639022624e-05, + "loss": 0.15504837036132812, + "step": 6115 + }, + { + "epoch": 0.41341084223333785, + "grad_norm": 1.0147569179534912, + "learning_rate": 1.997842625888521e-05, + "loss": 0.16594696044921875, + "step": 6116 + }, + { + "epoch": 0.413478437204272, + "grad_norm": 0.44096383452415466, + "learning_rate": 1.9975313637860577e-05, + "loss": 0.04611968994140625, + "step": 6117 + }, + { + "epoch": 0.4135460321752062, + "grad_norm": 0.72667396068573, + "learning_rate": 1.9972200776099337e-05, + "loss": 0.16161346435546875, + "step": 6118 + }, + { + "epoch": 0.4136136271461403, + "grad_norm": 0.83379727602005, + "learning_rate": 1.996908767375209e-05, + "loss": 0.162353515625, + "step": 6119 + }, + { + "epoch": 0.4136812221170745, + "grad_norm": 1.2826259136199951, + "learning_rate": 1.9965974330969485e-05, + "loss": 0.237945556640625, + "step": 6120 + }, + { + "epoch": 0.41374881708800865, + "grad_norm": 1.0710124969482422, + "learning_rate": 1.9962860747902153e-05, + "loss": 0.1274566650390625, + "step": 6121 + }, + { + "epoch": 0.4138164120589428, + "grad_norm": 0.27464568614959717, + "learning_rate": 1.9959746924700763e-05, + "loss": 0.052227020263671875, + "step": 6122 + }, + { + "epoch": 0.413884007029877, + "grad_norm": 1.5326567888259888, + "learning_rate": 1.9956632861515966e-05, + "loss": 0.220550537109375, + "step": 6123 + }, + { + "epoch": 0.41395160200081116, + "grad_norm": 0.5999363660812378, + "learning_rate": 1.9953518558498445e-05, + "loss": 0.129547119140625, + "step": 6124 + }, + { + "epoch": 0.41401919697174533, + "grad_norm": 0.7415916323661804, + "learning_rate": 1.9950404015798895e-05, + "loss": 0.10758209228515625, + "step": 6125 + }, + { + "epoch": 0.41408679194267944, + "grad_norm": 0.38237711787223816, + "learning_rate": 1.994728923356801e-05, + "loss": 0.070098876953125, + "step": 6126 + }, + { + "epoch": 0.4141543869136136, + "grad_norm": 0.463605135679245, + "learning_rate": 1.994417421195651e-05, + "loss": 0.084991455078125, + "step": 6127 + }, + { + "epoch": 0.4142219818845478, + "grad_norm": 0.3697888255119324, + "learning_rate": 1.9941058951115105e-05, + "loss": 0.05576324462890625, + "step": 6128 + }, + { + "epoch": 0.41428957685548196, + "grad_norm": 1.2208172082901, + "learning_rate": 1.9937943451194548e-05, + "loss": 0.215667724609375, + "step": 6129 + }, + { + "epoch": 0.4143571718264161, + "grad_norm": 0.3560745120048523, + "learning_rate": 1.9934827712345576e-05, + "loss": 0.0707550048828125, + "step": 6130 + }, + { + "epoch": 0.4144247667973503, + "grad_norm": 1.2220336198806763, + "learning_rate": 1.993171173471895e-05, + "loss": 0.19355010986328125, + "step": 6131 + }, + { + "epoch": 0.4144923617682844, + "grad_norm": 0.649437665939331, + "learning_rate": 1.992859551846544e-05, + "loss": 0.130157470703125, + "step": 6132 + }, + { + "epoch": 0.4145599567392186, + "grad_norm": 0.22030594944953918, + "learning_rate": 1.9925479063735824e-05, + "loss": 0.029109954833984375, + "step": 6133 + }, + { + "epoch": 0.41462755171015275, + "grad_norm": 0.2822883427143097, + "learning_rate": 1.9922362370680903e-05, + "loss": 0.06890106201171875, + "step": 6134 + }, + { + "epoch": 0.4146951466810869, + "grad_norm": 0.3249031603336334, + "learning_rate": 1.991924543945148e-05, + "loss": 0.06542205810546875, + "step": 6135 + }, + { + "epoch": 0.4147627416520211, + "grad_norm": 0.47968974709510803, + "learning_rate": 1.9916128270198368e-05, + "loss": 0.11734771728515625, + "step": 6136 + }, + { + "epoch": 0.41483033662295526, + "grad_norm": 0.28634119033813477, + "learning_rate": 1.99130108630724e-05, + "loss": 0.052753448486328125, + "step": 6137 + }, + { + "epoch": 0.41489793159388944, + "grad_norm": 0.5932565927505493, + "learning_rate": 1.9909893218224406e-05, + "loss": 0.1000213623046875, + "step": 6138 + }, + { + "epoch": 0.41496552656482355, + "grad_norm": 0.49662086367607117, + "learning_rate": 1.9906775335805252e-05, + "loss": 0.07878875732421875, + "step": 6139 + }, + { + "epoch": 0.4150331215357577, + "grad_norm": 0.7561150193214417, + "learning_rate": 1.9903657215965788e-05, + "loss": 0.13251113891601562, + "step": 6140 + }, + { + "epoch": 0.4151007165066919, + "grad_norm": 0.2868497669696808, + "learning_rate": 1.9900538858856895e-05, + "loss": 0.0602569580078125, + "step": 6141 + }, + { + "epoch": 0.41516831147762606, + "grad_norm": 0.7615206837654114, + "learning_rate": 1.9897420264629456e-05, + "loss": 0.171783447265625, + "step": 6142 + }, + { + "epoch": 0.41523590644856023, + "grad_norm": 0.2726419270038605, + "learning_rate": 1.989430143343437e-05, + "loss": 0.04126739501953125, + "step": 6143 + }, + { + "epoch": 0.4153035014194944, + "grad_norm": 0.14616838097572327, + "learning_rate": 1.989118236542253e-05, + "loss": 0.013312816619873047, + "step": 6144 + }, + { + "epoch": 0.4153710963904286, + "grad_norm": 0.41113579273223877, + "learning_rate": 1.9888063060744882e-05, + "loss": 0.05503082275390625, + "step": 6145 + }, + { + "epoch": 0.4154386913613627, + "grad_norm": 0.7472376227378845, + "learning_rate": 1.988494351955234e-05, + "loss": 0.15277099609375, + "step": 6146 + }, + { + "epoch": 0.41550628633229686, + "grad_norm": 1.6471680402755737, + "learning_rate": 1.9881823741995854e-05, + "loss": 0.221282958984375, + "step": 6147 + }, + { + "epoch": 0.41557388130323103, + "grad_norm": 0.7382670640945435, + "learning_rate": 1.9878703728226376e-05, + "loss": 0.15618896484375, + "step": 6148 + }, + { + "epoch": 0.4156414762741652, + "grad_norm": 0.6221492886543274, + "learning_rate": 1.9875583478394868e-05, + "loss": 0.08390045166015625, + "step": 6149 + }, + { + "epoch": 0.41570907124509937, + "grad_norm": 0.456689715385437, + "learning_rate": 1.987246299265231e-05, + "loss": 0.10765838623046875, + "step": 6150 + }, + { + "epoch": 0.41577666621603354, + "grad_norm": 0.4403458833694458, + "learning_rate": 1.9869342271149695e-05, + "loss": 0.105072021484375, + "step": 6151 + }, + { + "epoch": 0.4158442611869677, + "grad_norm": 0.4220757782459259, + "learning_rate": 1.9866221314038018e-05, + "loss": 0.06441116333007812, + "step": 6152 + }, + { + "epoch": 0.4159118561579018, + "grad_norm": 0.45783495903015137, + "learning_rate": 1.986310012146829e-05, + "loss": 0.1067047119140625, + "step": 6153 + }, + { + "epoch": 0.415979451128836, + "grad_norm": 0.6638703942298889, + "learning_rate": 1.9859978693591532e-05, + "loss": 0.12085485458374023, + "step": 6154 + }, + { + "epoch": 0.41604704609977017, + "grad_norm": 1.1084436178207397, + "learning_rate": 1.9856857030558786e-05, + "loss": 0.18511962890625, + "step": 6155 + }, + { + "epoch": 0.41611464107070434, + "grad_norm": 0.6726573705673218, + "learning_rate": 1.9853735132521088e-05, + "loss": 0.13919830322265625, + "step": 6156 + }, + { + "epoch": 0.4161822360416385, + "grad_norm": 0.37758669257164, + "learning_rate": 1.9850612999629503e-05, + "loss": 0.0554962158203125, + "step": 6157 + }, + { + "epoch": 0.4162498310125727, + "grad_norm": 0.44951075315475464, + "learning_rate": 1.9847490632035093e-05, + "loss": 0.0750579833984375, + "step": 6158 + }, + { + "epoch": 0.41631742598350685, + "grad_norm": 0.6875868439674377, + "learning_rate": 1.984436802988894e-05, + "loss": 0.1519927978515625, + "step": 6159 + }, + { + "epoch": 0.41638502095444097, + "grad_norm": 1.2098954916000366, + "learning_rate": 1.9841245193342137e-05, + "loss": 0.1703033447265625, + "step": 6160 + }, + { + "epoch": 0.41645261592537514, + "grad_norm": 0.47042685747146606, + "learning_rate": 1.9838122122545782e-05, + "loss": 0.0588226318359375, + "step": 6161 + }, + { + "epoch": 0.4165202108963093, + "grad_norm": 0.616154670715332, + "learning_rate": 1.9834998817650987e-05, + "loss": 0.14813232421875, + "step": 6162 + }, + { + "epoch": 0.4165878058672435, + "grad_norm": 1.0205731391906738, + "learning_rate": 1.9831875278808888e-05, + "loss": 0.18452072143554688, + "step": 6163 + }, + { + "epoch": 0.41665540083817765, + "grad_norm": 0.2523983120918274, + "learning_rate": 1.9828751506170614e-05, + "loss": 0.046291351318359375, + "step": 6164 + }, + { + "epoch": 0.4167229958091118, + "grad_norm": 0.6361353993415833, + "learning_rate": 1.9825627499887308e-05, + "loss": 0.09893798828125, + "step": 6165 + }, + { + "epoch": 0.416790590780046, + "grad_norm": 0.5673279762268066, + "learning_rate": 1.9822503260110134e-05, + "loss": 0.11819076538085938, + "step": 6166 + }, + { + "epoch": 0.4168581857509801, + "grad_norm": 0.5692901015281677, + "learning_rate": 1.9819378786990263e-05, + "loss": 0.09656524658203125, + "step": 6167 + }, + { + "epoch": 0.4169257807219143, + "grad_norm": 0.5650380849838257, + "learning_rate": 1.9816254080678875e-05, + "loss": 0.150665283203125, + "step": 6168 + }, + { + "epoch": 0.41699337569284844, + "grad_norm": 0.5064266324043274, + "learning_rate": 1.981312914132716e-05, + "loss": 0.11124038696289062, + "step": 6169 + }, + { + "epoch": 0.4170609706637826, + "grad_norm": 1.1098884344100952, + "learning_rate": 1.9810003969086326e-05, + "loss": 0.2099609375, + "step": 6170 + }, + { + "epoch": 0.4171285656347168, + "grad_norm": 0.19184009730815887, + "learning_rate": 1.980687856410759e-05, + "loss": 0.0292816162109375, + "step": 6171 + }, + { + "epoch": 0.41719616060565096, + "grad_norm": 0.3134273290634155, + "learning_rate": 1.9803752926542172e-05, + "loss": 0.045948028564453125, + "step": 6172 + }, + { + "epoch": 0.4172637555765851, + "grad_norm": 0.43645742535591125, + "learning_rate": 1.980062705654131e-05, + "loss": 0.10699462890625, + "step": 6173 + }, + { + "epoch": 0.41733135054751924, + "grad_norm": 0.45583799481391907, + "learning_rate": 1.979750095425626e-05, + "loss": 0.1032867431640625, + "step": 6174 + }, + { + "epoch": 0.4173989455184534, + "grad_norm": 0.602023184299469, + "learning_rate": 1.979437461983828e-05, + "loss": 0.10136795043945312, + "step": 6175 + }, + { + "epoch": 0.4174665404893876, + "grad_norm": 0.6426481008529663, + "learning_rate": 1.979124805343864e-05, + "loss": 0.10936737060546875, + "step": 6176 + }, + { + "epoch": 0.41753413546032175, + "grad_norm": 0.3245343565940857, + "learning_rate": 1.9788121255208623e-05, + "loss": 0.07152557373046875, + "step": 6177 + }, + { + "epoch": 0.4176017304312559, + "grad_norm": 0.8954101204872131, + "learning_rate": 1.978499422529952e-05, + "loss": 0.1623687744140625, + "step": 6178 + }, + { + "epoch": 0.4176693254021901, + "grad_norm": 0.3533841371536255, + "learning_rate": 1.978186696386264e-05, + "loss": 0.06194305419921875, + "step": 6179 + }, + { + "epoch": 0.41773692037312427, + "grad_norm": 0.6307495832443237, + "learning_rate": 1.9778739471049297e-05, + "loss": 0.156005859375, + "step": 6180 + }, + { + "epoch": 0.4178045153440584, + "grad_norm": 0.9598676562309265, + "learning_rate": 1.9775611747010822e-05, + "loss": 0.14559173583984375, + "step": 6181 + }, + { + "epoch": 0.41787211031499255, + "grad_norm": 0.6311177015304565, + "learning_rate": 1.9772483791898547e-05, + "loss": 0.138946533203125, + "step": 6182 + }, + { + "epoch": 0.4179397052859267, + "grad_norm": 1.040690302848816, + "learning_rate": 1.976935560586383e-05, + "loss": 0.1996917724609375, + "step": 6183 + }, + { + "epoch": 0.4180073002568609, + "grad_norm": 0.9529773592948914, + "learning_rate": 1.976622718905803e-05, + "loss": 0.1138763427734375, + "step": 6184 + }, + { + "epoch": 0.41807489522779506, + "grad_norm": 0.7090194225311279, + "learning_rate": 1.9763098541632516e-05, + "loss": 0.12408447265625, + "step": 6185 + }, + { + "epoch": 0.41814249019872923, + "grad_norm": 0.43548113107681274, + "learning_rate": 1.9759969663738672e-05, + "loss": 0.071014404296875, + "step": 6186 + }, + { + "epoch": 0.4182100851696634, + "grad_norm": 0.387123167514801, + "learning_rate": 1.975684055552789e-05, + "loss": 0.06571197509765625, + "step": 6187 + }, + { + "epoch": 0.4182776801405975, + "grad_norm": 0.8146902918815613, + "learning_rate": 1.9753711217151587e-05, + "loss": 0.1747283935546875, + "step": 6188 + }, + { + "epoch": 0.4183452751115317, + "grad_norm": 0.5440382361412048, + "learning_rate": 1.975058164876117e-05, + "loss": 0.104736328125, + "step": 6189 + }, + { + "epoch": 0.41841287008246586, + "grad_norm": 1.2641373872756958, + "learning_rate": 1.9747451850508065e-05, + "loss": 0.202056884765625, + "step": 6190 + }, + { + "epoch": 0.41848046505340003, + "grad_norm": 0.34747621417045593, + "learning_rate": 1.9744321822543725e-05, + "loss": 0.08029937744140625, + "step": 6191 + }, + { + "epoch": 0.4185480600243342, + "grad_norm": 1.0402802228927612, + "learning_rate": 1.9741191565019584e-05, + "loss": 0.131683349609375, + "step": 6192 + }, + { + "epoch": 0.41861565499526837, + "grad_norm": 0.2892255485057831, + "learning_rate": 1.973806107808711e-05, + "loss": 0.06275177001953125, + "step": 6193 + }, + { + "epoch": 0.41868324996620254, + "grad_norm": 0.2623407542705536, + "learning_rate": 1.9734930361897776e-05, + "loss": 0.06320858001708984, + "step": 6194 + }, + { + "epoch": 0.41875084493713666, + "grad_norm": 0.7817108035087585, + "learning_rate": 1.9731799416603062e-05, + "loss": 0.1647186279296875, + "step": 6195 + }, + { + "epoch": 0.4188184399080708, + "grad_norm": 0.9880834817886353, + "learning_rate": 1.9728668242354472e-05, + "loss": 0.12946319580078125, + "step": 6196 + }, + { + "epoch": 0.418886034879005, + "grad_norm": 0.3885834515094757, + "learning_rate": 1.9725536839303502e-05, + "loss": 0.0717926025390625, + "step": 6197 + }, + { + "epoch": 0.41895362984993917, + "grad_norm": 0.7404108643531799, + "learning_rate": 1.972240520760167e-05, + "loss": 0.16015625, + "step": 6198 + }, + { + "epoch": 0.41902122482087334, + "grad_norm": 0.9385894536972046, + "learning_rate": 1.971927334740051e-05, + "loss": 0.21142578125, + "step": 6199 + }, + { + "epoch": 0.4190888197918075, + "grad_norm": 0.25960057973861694, + "learning_rate": 1.971614125885155e-05, + "loss": 0.0399169921875, + "step": 6200 + }, + { + "epoch": 0.4191564147627416, + "grad_norm": 0.6880357265472412, + "learning_rate": 1.9713008942106352e-05, + "loss": 0.113006591796875, + "step": 6201 + }, + { + "epoch": 0.4192240097336758, + "grad_norm": 0.58144611120224, + "learning_rate": 1.970987639731647e-05, + "loss": 0.09263992309570312, + "step": 6202 + }, + { + "epoch": 0.41929160470460997, + "grad_norm": 0.6386545896530151, + "learning_rate": 1.9706743624633476e-05, + "loss": 0.11013031005859375, + "step": 6203 + }, + { + "epoch": 0.41935919967554414, + "grad_norm": 0.7350597977638245, + "learning_rate": 1.9703610624208956e-05, + "loss": 0.1788482666015625, + "step": 6204 + }, + { + "epoch": 0.4194267946464783, + "grad_norm": 0.2784861922264099, + "learning_rate": 1.97004773961945e-05, + "loss": 0.0583953857421875, + "step": 6205 + }, + { + "epoch": 0.4194943896174125, + "grad_norm": 0.6063908338546753, + "learning_rate": 1.969734394074172e-05, + "loss": 0.151397705078125, + "step": 6206 + }, + { + "epoch": 0.41956198458834665, + "grad_norm": 0.16222232580184937, + "learning_rate": 1.9694210258002227e-05, + "loss": 0.02154064178466797, + "step": 6207 + }, + { + "epoch": 0.41962957955928076, + "grad_norm": 0.3178059756755829, + "learning_rate": 1.9691076348127644e-05, + "loss": 0.0655364990234375, + "step": 6208 + }, + { + "epoch": 0.41969717453021493, + "grad_norm": 0.20263487100601196, + "learning_rate": 1.968794221126962e-05, + "loss": 0.02219390869140625, + "step": 6209 + }, + { + "epoch": 0.4197647695011491, + "grad_norm": 1.275024175643921, + "learning_rate": 1.9684807847579796e-05, + "loss": 0.240875244140625, + "step": 6210 + }, + { + "epoch": 0.4198323644720833, + "grad_norm": 0.5512054562568665, + "learning_rate": 1.968167325720983e-05, + "loss": 0.1171875, + "step": 6211 + }, + { + "epoch": 0.41989995944301745, + "grad_norm": 0.8529620170593262, + "learning_rate": 1.96785384403114e-05, + "loss": 0.128173828125, + "step": 6212 + }, + { + "epoch": 0.4199675544139516, + "grad_norm": 0.5736650228500366, + "learning_rate": 1.9675403397036187e-05, + "loss": 0.1015777587890625, + "step": 6213 + }, + { + "epoch": 0.4200351493848858, + "grad_norm": 0.2888832688331604, + "learning_rate": 1.967226812753588e-05, + "loss": 0.037883758544921875, + "step": 6214 + }, + { + "epoch": 0.4201027443558199, + "grad_norm": 0.5763230919837952, + "learning_rate": 1.9669132631962183e-05, + "loss": 0.119842529296875, + "step": 6215 + }, + { + "epoch": 0.42017033932675407, + "grad_norm": 0.2125220000743866, + "learning_rate": 1.966599691046681e-05, + "loss": 0.03720855712890625, + "step": 6216 + }, + { + "epoch": 0.42023793429768824, + "grad_norm": 1.2077970504760742, + "learning_rate": 1.966286096320149e-05, + "loss": 0.12842941284179688, + "step": 6217 + }, + { + "epoch": 0.4203055292686224, + "grad_norm": 0.5663610100746155, + "learning_rate": 1.9659724790317962e-05, + "loss": 0.113494873046875, + "step": 6218 + }, + { + "epoch": 0.4203731242395566, + "grad_norm": 1.9244498014450073, + "learning_rate": 1.965658839196797e-05, + "loss": 0.2523193359375, + "step": 6219 + }, + { + "epoch": 0.42044071921049075, + "grad_norm": 0.40864506363868713, + "learning_rate": 1.9653451768303268e-05, + "loss": 0.05521583557128906, + "step": 6220 + }, + { + "epoch": 0.4205083141814249, + "grad_norm": 0.6461116075515747, + "learning_rate": 1.9650314919475632e-05, + "loss": 0.1357879638671875, + "step": 6221 + }, + { + "epoch": 0.42057590915235904, + "grad_norm": 1.0890032052993774, + "learning_rate": 1.964717784563684e-05, + "loss": 0.12664794921875, + "step": 6222 + }, + { + "epoch": 0.4206435041232932, + "grad_norm": 1.0526448488235474, + "learning_rate": 1.9644040546938688e-05, + "loss": 0.15187454223632812, + "step": 6223 + }, + { + "epoch": 0.4207110990942274, + "grad_norm": 0.5536701679229736, + "learning_rate": 1.964090302353297e-05, + "loss": 0.1289215087890625, + "step": 6224 + }, + { + "epoch": 0.42077869406516155, + "grad_norm": 0.6468313336372375, + "learning_rate": 1.96377652755715e-05, + "loss": 0.12450408935546875, + "step": 6225 + }, + { + "epoch": 0.4208462890360957, + "grad_norm": 1.0341215133666992, + "learning_rate": 1.963462730320611e-05, + "loss": 0.15348243713378906, + "step": 6226 + }, + { + "epoch": 0.4209138840070299, + "grad_norm": 0.8356725573539734, + "learning_rate": 1.9631489106588624e-05, + "loss": 0.139923095703125, + "step": 6227 + }, + { + "epoch": 0.42098147897796406, + "grad_norm": 0.32330042123794556, + "learning_rate": 1.962835068587089e-05, + "loss": 0.047328948974609375, + "step": 6228 + }, + { + "epoch": 0.4210490739488982, + "grad_norm": 0.4135657548904419, + "learning_rate": 1.9625212041204767e-05, + "loss": 0.0696868896484375, + "step": 6229 + }, + { + "epoch": 0.42111666891983235, + "grad_norm": 0.6209238171577454, + "learning_rate": 1.9622073172742126e-05, + "loss": 0.0829315185546875, + "step": 6230 + }, + { + "epoch": 0.4211842638907665, + "grad_norm": 0.2993427515029907, + "learning_rate": 1.961893408063484e-05, + "loss": 0.06165313720703125, + "step": 6231 + }, + { + "epoch": 0.4212518588617007, + "grad_norm": 0.7723501324653625, + "learning_rate": 1.9615794765034794e-05, + "loss": 0.166839599609375, + "step": 6232 + }, + { + "epoch": 0.42131945383263486, + "grad_norm": 0.290597528219223, + "learning_rate": 1.9612655226093893e-05, + "loss": 0.024730682373046875, + "step": 6233 + }, + { + "epoch": 0.42138704880356903, + "grad_norm": 0.6665354371070862, + "learning_rate": 1.9609515463964052e-05, + "loss": 0.06885147094726562, + "step": 6234 + }, + { + "epoch": 0.4214546437745032, + "grad_norm": 0.4002794623374939, + "learning_rate": 1.9606375478797185e-05, + "loss": 0.062488555908203125, + "step": 6235 + }, + { + "epoch": 0.4215222387454373, + "grad_norm": 0.4446045756340027, + "learning_rate": 1.960323527074522e-05, + "loss": 0.0740509033203125, + "step": 6236 + }, + { + "epoch": 0.4215898337163715, + "grad_norm": 0.6151390075683594, + "learning_rate": 1.960009483996011e-05, + "loss": 0.106353759765625, + "step": 6237 + }, + { + "epoch": 0.42165742868730566, + "grad_norm": 0.5300898551940918, + "learning_rate": 1.9596954186593802e-05, + "loss": 0.1204986572265625, + "step": 6238 + }, + { + "epoch": 0.42172502365823983, + "grad_norm": 0.2983716130256653, + "learning_rate": 1.9593813310798263e-05, + "loss": 0.050868988037109375, + "step": 6239 + }, + { + "epoch": 0.421792618629174, + "grad_norm": 0.3852081000804901, + "learning_rate": 1.959067221272547e-05, + "loss": 0.0568389892578125, + "step": 6240 + }, + { + "epoch": 0.42186021360010817, + "grad_norm": 1.6125764846801758, + "learning_rate": 1.95875308925274e-05, + "loss": 0.2097015380859375, + "step": 6241 + }, + { + "epoch": 0.42192780857104234, + "grad_norm": 0.47856178879737854, + "learning_rate": 1.9584389350356058e-05, + "loss": 0.10796737670898438, + "step": 6242 + }, + { + "epoch": 0.42199540354197645, + "grad_norm": 0.8109943866729736, + "learning_rate": 1.958124758636345e-05, + "loss": 0.13988494873046875, + "step": 6243 + }, + { + "epoch": 0.4220629985129106, + "grad_norm": 0.46235188841819763, + "learning_rate": 1.9578105600701595e-05, + "loss": 0.0769195556640625, + "step": 6244 + }, + { + "epoch": 0.4221305934838448, + "grad_norm": 0.3621702492237091, + "learning_rate": 1.957496339352252e-05, + "loss": 0.061550140380859375, + "step": 6245 + }, + { + "epoch": 0.42219818845477897, + "grad_norm": 0.7809349894523621, + "learning_rate": 1.9571820964978263e-05, + "loss": 0.13226699829101562, + "step": 6246 + }, + { + "epoch": 0.42226578342571314, + "grad_norm": 0.7440240383148193, + "learning_rate": 1.9568678315220876e-05, + "loss": 0.144683837890625, + "step": 6247 + }, + { + "epoch": 0.4223333783966473, + "grad_norm": 0.3040701448917389, + "learning_rate": 1.956553544440242e-05, + "loss": 0.0687255859375, + "step": 6248 + }, + { + "epoch": 0.4224009733675815, + "grad_norm": 0.6287825703620911, + "learning_rate": 1.9562392352674967e-05, + "loss": 0.195404052734375, + "step": 6249 + }, + { + "epoch": 0.4224685683385156, + "grad_norm": 1.4880174398422241, + "learning_rate": 1.9559249040190597e-05, + "loss": 0.227294921875, + "step": 6250 + }, + { + "epoch": 0.42253616330944976, + "grad_norm": 0.4158921241760254, + "learning_rate": 1.9556105507101408e-05, + "loss": 0.0719757080078125, + "step": 6251 + }, + { + "epoch": 0.42260375828038393, + "grad_norm": 0.5703428983688354, + "learning_rate": 1.9552961753559498e-05, + "loss": 0.12833023071289062, + "step": 6252 + }, + { + "epoch": 0.4226713532513181, + "grad_norm": 1.8640938997268677, + "learning_rate": 1.9549817779716986e-05, + "loss": 0.29400634765625, + "step": 6253 + }, + { + "epoch": 0.4227389482222523, + "grad_norm": 0.65013587474823, + "learning_rate": 1.954667358572599e-05, + "loss": 0.1483612060546875, + "step": 6254 + }, + { + "epoch": 0.42280654319318645, + "grad_norm": 0.7937663793563843, + "learning_rate": 1.9543529171738654e-05, + "loss": 0.137542724609375, + "step": 6255 + }, + { + "epoch": 0.4228741381641206, + "grad_norm": 1.0073106288909912, + "learning_rate": 1.954038453790712e-05, + "loss": 0.237884521484375, + "step": 6256 + }, + { + "epoch": 0.42294173313505473, + "grad_norm": 0.5907875895500183, + "learning_rate": 1.9537239684383546e-05, + "loss": 0.137420654296875, + "step": 6257 + }, + { + "epoch": 0.4230093281059889, + "grad_norm": 0.7803759574890137, + "learning_rate": 1.9534094611320104e-05, + "loss": 0.14875030517578125, + "step": 6258 + }, + { + "epoch": 0.4230769230769231, + "grad_norm": 0.5169307589530945, + "learning_rate": 1.9530949318868962e-05, + "loss": 0.09827423095703125, + "step": 6259 + }, + { + "epoch": 0.42314451804785724, + "grad_norm": 0.2982105314731598, + "learning_rate": 1.9527803807182315e-05, + "loss": 0.0585479736328125, + "step": 6260 + }, + { + "epoch": 0.4232121130187914, + "grad_norm": 0.9850400686264038, + "learning_rate": 1.952465807641236e-05, + "loss": 0.15435791015625, + "step": 6261 + }, + { + "epoch": 0.4232797079897256, + "grad_norm": 0.5801507234573364, + "learning_rate": 1.952151212671131e-05, + "loss": 0.1478118896484375, + "step": 6262 + }, + { + "epoch": 0.4233473029606597, + "grad_norm": 0.25151440501213074, + "learning_rate": 1.9518365958231385e-05, + "loss": 0.041919708251953125, + "step": 6263 + }, + { + "epoch": 0.42341489793159387, + "grad_norm": 1.106690526008606, + "learning_rate": 1.9515219571124817e-05, + "loss": 0.13510894775390625, + "step": 6264 + }, + { + "epoch": 0.42348249290252804, + "grad_norm": 0.8928744196891785, + "learning_rate": 1.9512072965543843e-05, + "loss": 0.08618545532226562, + "step": 6265 + }, + { + "epoch": 0.4235500878734622, + "grad_norm": 0.77625972032547, + "learning_rate": 1.9508926141640717e-05, + "loss": 0.11297607421875, + "step": 6266 + }, + { + "epoch": 0.4236176828443964, + "grad_norm": 1.3966301679611206, + "learning_rate": 1.950577909956771e-05, + "loss": 0.212646484375, + "step": 6267 + }, + { + "epoch": 0.42368527781533055, + "grad_norm": 0.7910467982292175, + "learning_rate": 1.9502631839477085e-05, + "loss": 0.11113739013671875, + "step": 6268 + }, + { + "epoch": 0.4237528727862647, + "grad_norm": 1.065030574798584, + "learning_rate": 1.9499484361521134e-05, + "loss": 0.1479949951171875, + "step": 6269 + }, + { + "epoch": 0.42382046775719884, + "grad_norm": 1.0160284042358398, + "learning_rate": 1.9496336665852146e-05, + "loss": 0.149993896484375, + "step": 6270 + }, + { + "epoch": 0.423888062728133, + "grad_norm": 0.3701017200946808, + "learning_rate": 1.9493188752622423e-05, + "loss": 0.0666351318359375, + "step": 6271 + }, + { + "epoch": 0.4239556576990672, + "grad_norm": 0.6529484391212463, + "learning_rate": 1.9490040621984293e-05, + "loss": 0.120208740234375, + "step": 6272 + }, + { + "epoch": 0.42402325267000135, + "grad_norm": 0.6471588611602783, + "learning_rate": 1.9486892274090066e-05, + "loss": 0.09074020385742188, + "step": 6273 + }, + { + "epoch": 0.4240908476409355, + "grad_norm": 1.031603217124939, + "learning_rate": 1.948374370909209e-05, + "loss": 0.16705322265625, + "step": 6274 + }, + { + "epoch": 0.4241584426118697, + "grad_norm": 0.22383257746696472, + "learning_rate": 1.9480594927142713e-05, + "loss": 0.03040313720703125, + "step": 6275 + }, + { + "epoch": 0.42422603758280386, + "grad_norm": 0.6439732313156128, + "learning_rate": 1.9477445928394284e-05, + "loss": 0.100128173828125, + "step": 6276 + }, + { + "epoch": 0.424293632553738, + "grad_norm": 0.7080827951431274, + "learning_rate": 1.9474296712999182e-05, + "loss": 0.1372528076171875, + "step": 6277 + }, + { + "epoch": 0.42436122752467215, + "grad_norm": 0.63993239402771, + "learning_rate": 1.9471147281109777e-05, + "loss": 0.1234588623046875, + "step": 6278 + }, + { + "epoch": 0.4244288224956063, + "grad_norm": 0.6201884746551514, + "learning_rate": 1.9467997632878457e-05, + "loss": 0.11902618408203125, + "step": 6279 + }, + { + "epoch": 0.4244964174665405, + "grad_norm": 0.22496578097343445, + "learning_rate": 1.9464847768457627e-05, + "loss": 0.0441436767578125, + "step": 6280 + }, + { + "epoch": 0.42456401243747466, + "grad_norm": 0.8938481211662292, + "learning_rate": 1.94616976879997e-05, + "loss": 0.13885498046875, + "step": 6281 + }, + { + "epoch": 0.42463160740840883, + "grad_norm": 1.3046764135360718, + "learning_rate": 1.9458547391657083e-05, + "loss": 0.2234649658203125, + "step": 6282 + }, + { + "epoch": 0.424699202379343, + "grad_norm": 0.7908055782318115, + "learning_rate": 1.9455396879582224e-05, + "loss": 0.136474609375, + "step": 6283 + }, + { + "epoch": 0.4247667973502771, + "grad_norm": 1.5180412530899048, + "learning_rate": 1.9452246151927555e-05, + "loss": 0.251953125, + "step": 6284 + }, + { + "epoch": 0.4248343923212113, + "grad_norm": 0.590580403804779, + "learning_rate": 1.9449095208845527e-05, + "loss": 0.10439300537109375, + "step": 6285 + }, + { + "epoch": 0.42490198729214546, + "grad_norm": 0.32282543182373047, + "learning_rate": 1.9445944050488605e-05, + "loss": 0.072601318359375, + "step": 6286 + }, + { + "epoch": 0.4249695822630796, + "grad_norm": 0.7678634524345398, + "learning_rate": 1.9442792677009255e-05, + "loss": 0.14786529541015625, + "step": 6287 + }, + { + "epoch": 0.4250371772340138, + "grad_norm": 0.42964670062065125, + "learning_rate": 1.9439641088559976e-05, + "loss": 0.093597412109375, + "step": 6288 + }, + { + "epoch": 0.42510477220494797, + "grad_norm": 0.7213066816329956, + "learning_rate": 1.9436489285293246e-05, + "loss": 0.1183319091796875, + "step": 6289 + }, + { + "epoch": 0.42517236717588214, + "grad_norm": 1.0161583423614502, + "learning_rate": 1.9433337267361573e-05, + "loss": 0.17401123046875, + "step": 6290 + }, + { + "epoch": 0.42523996214681625, + "grad_norm": 0.30250826478004456, + "learning_rate": 1.9430185034917474e-05, + "loss": 0.053310394287109375, + "step": 6291 + }, + { + "epoch": 0.4253075571177504, + "grad_norm": 0.2957776188850403, + "learning_rate": 1.942703258811347e-05, + "loss": 0.05791473388671875, + "step": 6292 + }, + { + "epoch": 0.4253751520886846, + "grad_norm": 0.44216910004615784, + "learning_rate": 1.9423879927102106e-05, + "loss": 0.09149169921875, + "step": 6293 + }, + { + "epoch": 0.42544274705961876, + "grad_norm": 0.513940155506134, + "learning_rate": 1.9420727052035913e-05, + "loss": 0.104095458984375, + "step": 6294 + }, + { + "epoch": 0.42551034203055293, + "grad_norm": 1.0028002262115479, + "learning_rate": 1.9417573963067447e-05, + "loss": 0.13432693481445312, + "step": 6295 + }, + { + "epoch": 0.4255779370014871, + "grad_norm": 0.8696045875549316, + "learning_rate": 1.9414420660349288e-05, + "loss": 0.12660980224609375, + "step": 6296 + }, + { + "epoch": 0.4256455319724213, + "grad_norm": 0.5398858785629272, + "learning_rate": 1.9411267144034004e-05, + "loss": 0.10199737548828125, + "step": 6297 + }, + { + "epoch": 0.4257131269433554, + "grad_norm": 2.168072462081909, + "learning_rate": 1.940811341427418e-05, + "loss": 0.204925537109375, + "step": 6298 + }, + { + "epoch": 0.42578072191428956, + "grad_norm": 0.2231971174478531, + "learning_rate": 1.940495947122242e-05, + "loss": 0.031536102294921875, + "step": 6299 + }, + { + "epoch": 0.42584831688522373, + "grad_norm": 0.7277951836585999, + "learning_rate": 1.9401805315031318e-05, + "loss": 0.0796051025390625, + "step": 6300 + }, + { + "epoch": 0.4259159118561579, + "grad_norm": 1.1556023359298706, + "learning_rate": 1.9398650945853504e-05, + "loss": 0.14015960693359375, + "step": 6301 + }, + { + "epoch": 0.4259835068270921, + "grad_norm": 0.5988995432853699, + "learning_rate": 1.9395496363841607e-05, + "loss": 0.1149444580078125, + "step": 6302 + }, + { + "epoch": 0.42605110179802624, + "grad_norm": 0.45298144221305847, + "learning_rate": 1.9392341569148254e-05, + "loss": 0.07592010498046875, + "step": 6303 + }, + { + "epoch": 0.4261186967689604, + "grad_norm": 0.4645782709121704, + "learning_rate": 1.9389186561926105e-05, + "loss": 0.0989990234375, + "step": 6304 + }, + { + "epoch": 0.42618629173989453, + "grad_norm": 1.0950400829315186, + "learning_rate": 1.9386031342327812e-05, + "loss": 0.1803741455078125, + "step": 6305 + }, + { + "epoch": 0.4262538867108287, + "grad_norm": 0.5430124402046204, + "learning_rate": 1.938287591050605e-05, + "loss": 0.1019439697265625, + "step": 6306 + }, + { + "epoch": 0.42632148168176287, + "grad_norm": 0.9150962233543396, + "learning_rate": 1.937972026661349e-05, + "loss": 0.1502532958984375, + "step": 6307 + }, + { + "epoch": 0.42638907665269704, + "grad_norm": 1.0054421424865723, + "learning_rate": 1.9376564410802826e-05, + "loss": 0.13005828857421875, + "step": 6308 + }, + { + "epoch": 0.4264566716236312, + "grad_norm": 1.1048015356063843, + "learning_rate": 1.937340834322676e-05, + "loss": 0.1650390625, + "step": 6309 + }, + { + "epoch": 0.4265242665945654, + "grad_norm": 0.22967016696929932, + "learning_rate": 1.9370252064038e-05, + "loss": 0.040374755859375, + "step": 6310 + }, + { + "epoch": 0.42659186156549955, + "grad_norm": 0.44645392894744873, + "learning_rate": 1.936709557338927e-05, + "loss": 0.07281494140625, + "step": 6311 + }, + { + "epoch": 0.42665945653643367, + "grad_norm": 0.46788474917411804, + "learning_rate": 1.936393887143329e-05, + "loss": 0.05928802490234375, + "step": 6312 + }, + { + "epoch": 0.42672705150736784, + "grad_norm": 0.9457079172134399, + "learning_rate": 1.9360781958322815e-05, + "loss": 0.1682586669921875, + "step": 6313 + }, + { + "epoch": 0.426794646478302, + "grad_norm": 0.4130497872829437, + "learning_rate": 1.9357624834210584e-05, + "loss": 0.051807403564453125, + "step": 6314 + }, + { + "epoch": 0.4268622414492362, + "grad_norm": 0.4277002811431885, + "learning_rate": 1.935446749924937e-05, + "loss": 0.07070159912109375, + "step": 6315 + }, + { + "epoch": 0.42692983642017035, + "grad_norm": 0.9390903115272522, + "learning_rate": 1.935130995359194e-05, + "loss": 0.1283721923828125, + "step": 6316 + }, + { + "epoch": 0.4269974313911045, + "grad_norm": 0.8253328800201416, + "learning_rate": 1.9348152197391065e-05, + "loss": 0.13177490234375, + "step": 6317 + }, + { + "epoch": 0.4270650263620387, + "grad_norm": 0.35806894302368164, + "learning_rate": 1.934499423079955e-05, + "loss": 0.07085418701171875, + "step": 6318 + }, + { + "epoch": 0.4271326213329728, + "grad_norm": 0.35984504222869873, + "learning_rate": 1.9341836053970195e-05, + "loss": 0.060367584228515625, + "step": 6319 + }, + { + "epoch": 0.427200216303907, + "grad_norm": 0.8669466376304626, + "learning_rate": 1.9338677667055806e-05, + "loss": 0.181243896484375, + "step": 6320 + }, + { + "epoch": 0.42726781127484115, + "grad_norm": 1.3009997606277466, + "learning_rate": 1.9335519070209213e-05, + "loss": 0.134033203125, + "step": 6321 + }, + { + "epoch": 0.4273354062457753, + "grad_norm": 0.49336865544319153, + "learning_rate": 1.933236026358324e-05, + "loss": 0.10877227783203125, + "step": 6322 + }, + { + "epoch": 0.4274030012167095, + "grad_norm": 0.9562320113182068, + "learning_rate": 1.9329201247330745e-05, + "loss": 0.1149139404296875, + "step": 6323 + }, + { + "epoch": 0.42747059618764366, + "grad_norm": 0.37029939889907837, + "learning_rate": 1.932604202160456e-05, + "loss": 0.0694732666015625, + "step": 6324 + }, + { + "epoch": 0.42753819115857783, + "grad_norm": 0.7902434468269348, + "learning_rate": 1.932288258655756e-05, + "loss": 0.091583251953125, + "step": 6325 + }, + { + "epoch": 0.42760578612951194, + "grad_norm": 1.7955548763275146, + "learning_rate": 1.931972294234262e-05, + "loss": 0.225341796875, + "step": 6326 + }, + { + "epoch": 0.4276733811004461, + "grad_norm": 0.3261522650718689, + "learning_rate": 1.9316563089112622e-05, + "loss": 0.059478759765625, + "step": 6327 + }, + { + "epoch": 0.4277409760713803, + "grad_norm": 0.5872281193733215, + "learning_rate": 1.9313403027020458e-05, + "loss": 0.108062744140625, + "step": 6328 + }, + { + "epoch": 0.42780857104231446, + "grad_norm": 0.4856436550617218, + "learning_rate": 1.9310242756219027e-05, + "loss": 0.099273681640625, + "step": 6329 + }, + { + "epoch": 0.4278761660132486, + "grad_norm": 0.3664376437664032, + "learning_rate": 1.9307082276861244e-05, + "loss": 0.05112457275390625, + "step": 6330 + }, + { + "epoch": 0.4279437609841828, + "grad_norm": 0.2892214059829712, + "learning_rate": 1.9303921589100038e-05, + "loss": 0.050994873046875, + "step": 6331 + }, + { + "epoch": 0.4280113559551169, + "grad_norm": 0.5347984433174133, + "learning_rate": 1.9300760693088338e-05, + "loss": 0.130462646484375, + "step": 6332 + }, + { + "epoch": 0.4280789509260511, + "grad_norm": 0.15901321172714233, + "learning_rate": 1.9297599588979095e-05, + "loss": 0.039703369140625, + "step": 6333 + }, + { + "epoch": 0.42814654589698525, + "grad_norm": 0.6456589698791504, + "learning_rate": 1.9294438276925252e-05, + "loss": 0.11594390869140625, + "step": 6334 + }, + { + "epoch": 0.4282141408679194, + "grad_norm": 1.1737899780273438, + "learning_rate": 1.929127675707978e-05, + "loss": 0.1424407958984375, + "step": 6335 + }, + { + "epoch": 0.4282817358388536, + "grad_norm": 0.8315610289573669, + "learning_rate": 1.928811502959565e-05, + "loss": 0.1381378173828125, + "step": 6336 + }, + { + "epoch": 0.42834933080978776, + "grad_norm": 0.450644314289093, + "learning_rate": 1.928495309462585e-05, + "loss": 0.08272552490234375, + "step": 6337 + }, + { + "epoch": 0.42841692578072194, + "grad_norm": 0.3386661410331726, + "learning_rate": 1.9281790952323366e-05, + "loss": 0.06412506103515625, + "step": 6338 + }, + { + "epoch": 0.42848452075165605, + "grad_norm": 1.143170714378357, + "learning_rate": 1.9278628602841214e-05, + "loss": 0.17879867553710938, + "step": 6339 + }, + { + "epoch": 0.4285521157225902, + "grad_norm": 0.7596081495285034, + "learning_rate": 1.9275466046332403e-05, + "loss": 0.14776611328125, + "step": 6340 + }, + { + "epoch": 0.4286197106935244, + "grad_norm": 0.5099322199821472, + "learning_rate": 1.927230328294995e-05, + "loss": 0.104949951171875, + "step": 6341 + }, + { + "epoch": 0.42868730566445856, + "grad_norm": 0.318671852350235, + "learning_rate": 1.9269140312846897e-05, + "loss": 0.0766754150390625, + "step": 6342 + }, + { + "epoch": 0.42875490063539273, + "grad_norm": 0.655720055103302, + "learning_rate": 1.9265977136176286e-05, + "loss": 0.12174224853515625, + "step": 6343 + }, + { + "epoch": 0.4288224956063269, + "grad_norm": 1.1100682020187378, + "learning_rate": 1.9262813753091173e-05, + "loss": 0.264312744140625, + "step": 6344 + }, + { + "epoch": 0.4288900905772611, + "grad_norm": 0.28618308901786804, + "learning_rate": 1.925965016374462e-05, + "loss": 0.044315338134765625, + "step": 6345 + }, + { + "epoch": 0.4289576855481952, + "grad_norm": 0.28707754611968994, + "learning_rate": 1.9256486368289698e-05, + "loss": 0.04489898681640625, + "step": 6346 + }, + { + "epoch": 0.42902528051912936, + "grad_norm": 0.2623273730278015, + "learning_rate": 1.9253322366879495e-05, + "loss": 0.05646514892578125, + "step": 6347 + }, + { + "epoch": 0.42909287549006353, + "grad_norm": 0.3316405713558197, + "learning_rate": 1.925015815966711e-05, + "loss": 0.07353973388671875, + "step": 6348 + }, + { + "epoch": 0.4291604704609977, + "grad_norm": 0.19416841864585876, + "learning_rate": 1.924699374680564e-05, + "loss": 0.03543663024902344, + "step": 6349 + }, + { + "epoch": 0.42922806543193187, + "grad_norm": 1.1079589128494263, + "learning_rate": 1.9243829128448196e-05, + "loss": 0.21527099609375, + "step": 6350 + }, + { + "epoch": 0.42929566040286604, + "grad_norm": 0.7254719734191895, + "learning_rate": 1.9240664304747914e-05, + "loss": 0.1590576171875, + "step": 6351 + }, + { + "epoch": 0.4293632553738002, + "grad_norm": 0.7365336418151855, + "learning_rate": 1.9237499275857917e-05, + "loss": 0.12808990478515625, + "step": 6352 + }, + { + "epoch": 0.4294308503447343, + "grad_norm": 0.5496067404747009, + "learning_rate": 1.9234334041931352e-05, + "loss": 0.12810516357421875, + "step": 6353 + }, + { + "epoch": 0.4294984453156685, + "grad_norm": 0.4856751263141632, + "learning_rate": 1.9231168603121373e-05, + "loss": 0.083343505859375, + "step": 6354 + }, + { + "epoch": 0.42956604028660267, + "grad_norm": 0.781173825263977, + "learning_rate": 1.922800295958114e-05, + "loss": 0.1673736572265625, + "step": 6355 + }, + { + "epoch": 0.42963363525753684, + "grad_norm": 0.24008873105049133, + "learning_rate": 1.9224837111463834e-05, + "loss": 0.03197479248046875, + "step": 6356 + }, + { + "epoch": 0.429701230228471, + "grad_norm": 0.6691139340400696, + "learning_rate": 1.9221671058922637e-05, + "loss": 0.1048126220703125, + "step": 6357 + }, + { + "epoch": 0.4297688251994052, + "grad_norm": 0.5612442493438721, + "learning_rate": 1.9218504802110734e-05, + "loss": 0.11925506591796875, + "step": 6358 + }, + { + "epoch": 0.42983642017033935, + "grad_norm": 0.23368534445762634, + "learning_rate": 1.921533834118134e-05, + "loss": 0.040142059326171875, + "step": 6359 + }, + { + "epoch": 0.42990401514127347, + "grad_norm": 0.3241831660270691, + "learning_rate": 1.9212171676287662e-05, + "loss": 0.054347991943359375, + "step": 6360 + }, + { + "epoch": 0.42997161011220764, + "grad_norm": 0.564415693283081, + "learning_rate": 1.920900480758292e-05, + "loss": 0.09832382202148438, + "step": 6361 + }, + { + "epoch": 0.4300392050831418, + "grad_norm": 0.6019802093505859, + "learning_rate": 1.920583773522035e-05, + "loss": 0.1368408203125, + "step": 6362 + }, + { + "epoch": 0.430106800054076, + "grad_norm": 0.5147311687469482, + "learning_rate": 1.9202670459353197e-05, + "loss": 0.08107757568359375, + "step": 6363 + }, + { + "epoch": 0.43017439502501015, + "grad_norm": 0.5944850444793701, + "learning_rate": 1.919950298013471e-05, + "loss": 0.13372802734375, + "step": 6364 + }, + { + "epoch": 0.4302419899959443, + "grad_norm": 0.21618865430355072, + "learning_rate": 1.9196335297718153e-05, + "loss": 0.038982391357421875, + "step": 6365 + }, + { + "epoch": 0.4303095849668785, + "grad_norm": 0.45112714171409607, + "learning_rate": 1.919316741225679e-05, + "loss": 0.076934814453125, + "step": 6366 + }, + { + "epoch": 0.4303771799378126, + "grad_norm": 0.28899291157722473, + "learning_rate": 1.918999932390392e-05, + "loss": 0.0587615966796875, + "step": 6367 + }, + { + "epoch": 0.4304447749087468, + "grad_norm": 0.4045732915401459, + "learning_rate": 1.9186831032812822e-05, + "loss": 0.06966400146484375, + "step": 6368 + }, + { + "epoch": 0.43051236987968095, + "grad_norm": 0.2865433990955353, + "learning_rate": 1.91836625391368e-05, + "loss": 0.025389671325683594, + "step": 6369 + }, + { + "epoch": 0.4305799648506151, + "grad_norm": 0.9847175478935242, + "learning_rate": 1.918049384302917e-05, + "loss": 0.1927032470703125, + "step": 6370 + }, + { + "epoch": 0.4306475598215493, + "grad_norm": 0.9164602756500244, + "learning_rate": 1.9177324944643244e-05, + "loss": 0.1470775604248047, + "step": 6371 + }, + { + "epoch": 0.43071515479248346, + "grad_norm": 0.2513956129550934, + "learning_rate": 1.917415584413236e-05, + "loss": 0.033824920654296875, + "step": 6372 + }, + { + "epoch": 0.4307827497634176, + "grad_norm": 0.43760791420936584, + "learning_rate": 1.917098654164986e-05, + "loss": 0.094482421875, + "step": 6373 + }, + { + "epoch": 0.43085034473435174, + "grad_norm": 0.7173967361450195, + "learning_rate": 1.9167817037349094e-05, + "loss": 0.08283615112304688, + "step": 6374 + }, + { + "epoch": 0.4309179397052859, + "grad_norm": 0.7090446352958679, + "learning_rate": 1.9164647331383417e-05, + "loss": 0.09177017211914062, + "step": 6375 + }, + { + "epoch": 0.4309855346762201, + "grad_norm": 0.9203032851219177, + "learning_rate": 1.9161477423906203e-05, + "loss": 0.1473541259765625, + "step": 6376 + }, + { + "epoch": 0.43105312964715425, + "grad_norm": 0.744821310043335, + "learning_rate": 1.9158307315070834e-05, + "loss": 0.08785247802734375, + "step": 6377 + }, + { + "epoch": 0.4311207246180884, + "grad_norm": 0.5631360411643982, + "learning_rate": 1.9155137005030695e-05, + "loss": 0.118133544921875, + "step": 6378 + }, + { + "epoch": 0.4311883195890226, + "grad_norm": 1.0161622762680054, + "learning_rate": 1.9151966493939185e-05, + "loss": 0.1610260009765625, + "step": 6379 + }, + { + "epoch": 0.43125591455995677, + "grad_norm": 0.38742804527282715, + "learning_rate": 1.9148795781949716e-05, + "loss": 0.0831146240234375, + "step": 6380 + }, + { + "epoch": 0.4313235095308909, + "grad_norm": 0.6179527640342712, + "learning_rate": 1.9145624869215708e-05, + "loss": 0.113067626953125, + "step": 6381 + }, + { + "epoch": 0.43139110450182505, + "grad_norm": 0.3101397752761841, + "learning_rate": 1.9142453755890586e-05, + "loss": 0.03484344482421875, + "step": 6382 + }, + { + "epoch": 0.4314586994727592, + "grad_norm": 0.7116536498069763, + "learning_rate": 1.913928244212779e-05, + "loss": 0.154754638671875, + "step": 6383 + }, + { + "epoch": 0.4315262944436934, + "grad_norm": 0.8201739192008972, + "learning_rate": 1.9136110928080765e-05, + "loss": 0.16424560546875, + "step": 6384 + }, + { + "epoch": 0.43159388941462756, + "grad_norm": 0.3546072840690613, + "learning_rate": 1.9132939213902973e-05, + "loss": 0.06073760986328125, + "step": 6385 + }, + { + "epoch": 0.43166148438556173, + "grad_norm": 1.4967901706695557, + "learning_rate": 1.9129767299747885e-05, + "loss": 0.217315673828125, + "step": 6386 + }, + { + "epoch": 0.4317290793564959, + "grad_norm": 0.3784692585468292, + "learning_rate": 1.9126595185768963e-05, + "loss": 0.06517791748046875, + "step": 6387 + }, + { + "epoch": 0.43179667432743, + "grad_norm": 0.49778854846954346, + "learning_rate": 1.9123422872119706e-05, + "loss": 0.09993743896484375, + "step": 6388 + }, + { + "epoch": 0.4318642692983642, + "grad_norm": 0.5808599591255188, + "learning_rate": 1.9120250358953607e-05, + "loss": 0.09112548828125, + "step": 6389 + }, + { + "epoch": 0.43193186426929836, + "grad_norm": 0.31558579206466675, + "learning_rate": 1.9117077646424175e-05, + "loss": 0.03570556640625, + "step": 6390 + }, + { + "epoch": 0.43199945924023253, + "grad_norm": 0.2623380422592163, + "learning_rate": 1.9113904734684924e-05, + "loss": 0.04857635498046875, + "step": 6391 + }, + { + "epoch": 0.4320670542111667, + "grad_norm": 1.261206030845642, + "learning_rate": 1.9110731623889373e-05, + "loss": 0.170318603515625, + "step": 6392 + }, + { + "epoch": 0.43213464918210087, + "grad_norm": 0.7425710558891296, + "learning_rate": 1.910755831419106e-05, + "loss": 0.130279541015625, + "step": 6393 + }, + { + "epoch": 0.432202244153035, + "grad_norm": 0.4261048436164856, + "learning_rate": 1.910438480574354e-05, + "loss": 0.065582275390625, + "step": 6394 + }, + { + "epoch": 0.43226983912396916, + "grad_norm": 0.26420319080352783, + "learning_rate": 1.9101211098700348e-05, + "loss": 0.044490814208984375, + "step": 6395 + }, + { + "epoch": 0.43233743409490333, + "grad_norm": 0.24997486174106598, + "learning_rate": 1.9098037193215063e-05, + "loss": 0.04624176025390625, + "step": 6396 + }, + { + "epoch": 0.4324050290658375, + "grad_norm": 0.8936746120452881, + "learning_rate": 1.9094863089441253e-05, + "loss": 0.12531280517578125, + "step": 6397 + }, + { + "epoch": 0.43247262403677167, + "grad_norm": 0.7762422561645508, + "learning_rate": 1.9091688787532504e-05, + "loss": 0.214111328125, + "step": 6398 + }, + { + "epoch": 0.43254021900770584, + "grad_norm": 0.30762869119644165, + "learning_rate": 1.90885142876424e-05, + "loss": 0.07537078857421875, + "step": 6399 + }, + { + "epoch": 0.43260781397864, + "grad_norm": 1.5539971590042114, + "learning_rate": 1.908533958992455e-05, + "loss": 0.2114410400390625, + "step": 6400 + }, + { + "epoch": 0.4326754089495741, + "grad_norm": 0.3869558870792389, + "learning_rate": 1.9082164694532566e-05, + "loss": 0.08473968505859375, + "step": 6401 + }, + { + "epoch": 0.4327430039205083, + "grad_norm": 1.9174268245697021, + "learning_rate": 1.9078989601620067e-05, + "loss": 0.1471729278564453, + "step": 6402 + }, + { + "epoch": 0.43281059889144247, + "grad_norm": 0.2929462492465973, + "learning_rate": 1.9075814311340685e-05, + "loss": 0.042842864990234375, + "step": 6403 + }, + { + "epoch": 0.43287819386237664, + "grad_norm": 0.653995931148529, + "learning_rate": 1.9072638823848055e-05, + "loss": 0.08870697021484375, + "step": 6404 + }, + { + "epoch": 0.4329457888333108, + "grad_norm": 0.8918347954750061, + "learning_rate": 1.906946313929583e-05, + "loss": 0.12562179565429688, + "step": 6405 + }, + { + "epoch": 0.433013383804245, + "grad_norm": 0.4037688374519348, + "learning_rate": 1.9066287257837678e-05, + "loss": 0.06351852416992188, + "step": 6406 + }, + { + "epoch": 0.43308097877517915, + "grad_norm": 0.4601462781429291, + "learning_rate": 1.906311117962726e-05, + "loss": 0.08698654174804688, + "step": 6407 + }, + { + "epoch": 0.43314857374611326, + "grad_norm": 0.9837967157363892, + "learning_rate": 1.905993490481825e-05, + "loss": 0.18399810791015625, + "step": 6408 + }, + { + "epoch": 0.43321616871704743, + "grad_norm": 0.21255306899547577, + "learning_rate": 1.9056758433564343e-05, + "loss": 0.036678314208984375, + "step": 6409 + }, + { + "epoch": 0.4332837636879816, + "grad_norm": 0.27029871940612793, + "learning_rate": 1.9053581766019238e-05, + "loss": 0.0672760009765625, + "step": 6410 + }, + { + "epoch": 0.4333513586589158, + "grad_norm": 0.2934730350971222, + "learning_rate": 1.9050404902336634e-05, + "loss": 0.066162109375, + "step": 6411 + }, + { + "epoch": 0.43341895362984995, + "grad_norm": 0.40039530396461487, + "learning_rate": 1.9047227842670253e-05, + "loss": 0.074066162109375, + "step": 6412 + }, + { + "epoch": 0.4334865486007841, + "grad_norm": 0.8173328638076782, + "learning_rate": 1.904405058717382e-05, + "loss": 0.1510467529296875, + "step": 6413 + }, + { + "epoch": 0.4335541435717183, + "grad_norm": 0.3522838056087494, + "learning_rate": 1.9040873136001066e-05, + "loss": 0.09366989135742188, + "step": 6414 + }, + { + "epoch": 0.4336217385426524, + "grad_norm": 1.5946054458618164, + "learning_rate": 1.903769548930574e-05, + "loss": 0.284332275390625, + "step": 6415 + }, + { + "epoch": 0.4336893335135866, + "grad_norm": 0.26299822330474854, + "learning_rate": 1.90345176472416e-05, + "loss": 0.05121612548828125, + "step": 6416 + }, + { + "epoch": 0.43375692848452074, + "grad_norm": 0.8145299553871155, + "learning_rate": 1.90313396099624e-05, + "loss": 0.1295928955078125, + "step": 6417 + }, + { + "epoch": 0.4338245234554549, + "grad_norm": 0.5893213748931885, + "learning_rate": 1.9028161377621926e-05, + "loss": 0.12401580810546875, + "step": 6418 + }, + { + "epoch": 0.4338921184263891, + "grad_norm": 0.5184214115142822, + "learning_rate": 1.9024982950373954e-05, + "loss": 0.083465576171875, + "step": 6419 + }, + { + "epoch": 0.43395971339732325, + "grad_norm": 0.9756946563720703, + "learning_rate": 1.902180432837227e-05, + "loss": 0.206573486328125, + "step": 6420 + }, + { + "epoch": 0.4340273083682574, + "grad_norm": 0.2573602497577667, + "learning_rate": 1.9018625511770686e-05, + "loss": 0.037616729736328125, + "step": 6421 + }, + { + "epoch": 0.43409490333919154, + "grad_norm": 0.25075867772102356, + "learning_rate": 1.9015446500723005e-05, + "loss": 0.05518341064453125, + "step": 6422 + }, + { + "epoch": 0.4341624983101257, + "grad_norm": 0.9830763936042786, + "learning_rate": 1.9012267295383053e-05, + "loss": 0.12117767333984375, + "step": 6423 + }, + { + "epoch": 0.4342300932810599, + "grad_norm": 0.2308013141155243, + "learning_rate": 1.9009087895904656e-05, + "loss": 0.0295562744140625, + "step": 6424 + }, + { + "epoch": 0.43429768825199405, + "grad_norm": 0.8233740329742432, + "learning_rate": 1.900590830244165e-05, + "loss": 0.1270751953125, + "step": 6425 + }, + { + "epoch": 0.4343652832229282, + "grad_norm": 0.6700736880302429, + "learning_rate": 1.9002728515147895e-05, + "loss": 0.11986541748046875, + "step": 6426 + }, + { + "epoch": 0.4344328781938624, + "grad_norm": 1.0016710758209229, + "learning_rate": 1.899954853417724e-05, + "loss": 0.183807373046875, + "step": 6427 + }, + { + "epoch": 0.43450047316479656, + "grad_norm": 0.29291149973869324, + "learning_rate": 1.899636835968355e-05, + "loss": 0.05648040771484375, + "step": 6428 + }, + { + "epoch": 0.4345680681357307, + "grad_norm": 0.5113793611526489, + "learning_rate": 1.899318799182071e-05, + "loss": 0.07352447509765625, + "step": 6429 + }, + { + "epoch": 0.43463566310666485, + "grad_norm": 0.6886359453201294, + "learning_rate": 1.89900074307426e-05, + "loss": 0.12944793701171875, + "step": 6430 + }, + { + "epoch": 0.434703258077599, + "grad_norm": 1.2215126752853394, + "learning_rate": 1.8986826676603115e-05, + "loss": 0.154876708984375, + "step": 6431 + }, + { + "epoch": 0.4347708530485332, + "grad_norm": 0.3771626949310303, + "learning_rate": 1.8983645729556168e-05, + "loss": 0.06475830078125, + "step": 6432 + }, + { + "epoch": 0.43483844801946736, + "grad_norm": 0.7020666003227234, + "learning_rate": 1.8980464589755662e-05, + "loss": 0.1093902587890625, + "step": 6433 + }, + { + "epoch": 0.43490604299040153, + "grad_norm": 0.3378264009952545, + "learning_rate": 1.8977283257355524e-05, + "loss": 0.06136322021484375, + "step": 6434 + }, + { + "epoch": 0.4349736379613357, + "grad_norm": 0.902926504611969, + "learning_rate": 1.897410173250969e-05, + "loss": 0.15444183349609375, + "step": 6435 + }, + { + "epoch": 0.4350412329322698, + "grad_norm": 0.7664850950241089, + "learning_rate": 1.8970920015372096e-05, + "loss": 0.13829421997070312, + "step": 6436 + }, + { + "epoch": 0.435108827903204, + "grad_norm": 0.7284375429153442, + "learning_rate": 1.89677381060967e-05, + "loss": 0.09368896484375, + "step": 6437 + }, + { + "epoch": 0.43517642287413816, + "grad_norm": 0.21146626770496368, + "learning_rate": 1.8964556004837454e-05, + "loss": 0.040859222412109375, + "step": 6438 + }, + { + "epoch": 0.43524401784507233, + "grad_norm": 0.3198062777519226, + "learning_rate": 1.896137371174834e-05, + "loss": 0.064971923828125, + "step": 6439 + }, + { + "epoch": 0.4353116128160065, + "grad_norm": 0.7759438157081604, + "learning_rate": 1.895819122698333e-05, + "loss": 0.1464385986328125, + "step": 6440 + }, + { + "epoch": 0.43537920778694067, + "grad_norm": 0.3828255534172058, + "learning_rate": 1.895500855069641e-05, + "loss": 0.072479248046875, + "step": 6441 + }, + { + "epoch": 0.43544680275787484, + "grad_norm": 1.2284168004989624, + "learning_rate": 1.8951825683041577e-05, + "loss": 0.11821746826171875, + "step": 6442 + }, + { + "epoch": 0.43551439772880896, + "grad_norm": 0.5952811241149902, + "learning_rate": 1.8948642624172848e-05, + "loss": 0.12255096435546875, + "step": 6443 + }, + { + "epoch": 0.4355819926997431, + "grad_norm": 0.960499107837677, + "learning_rate": 1.8945459374244226e-05, + "loss": 0.14638900756835938, + "step": 6444 + }, + { + "epoch": 0.4356495876706773, + "grad_norm": 0.6090748906135559, + "learning_rate": 1.894227593340975e-05, + "loss": 0.10826873779296875, + "step": 6445 + }, + { + "epoch": 0.43571718264161147, + "grad_norm": 1.049677848815918, + "learning_rate": 1.8939092301823444e-05, + "loss": 0.15544891357421875, + "step": 6446 + }, + { + "epoch": 0.43578477761254564, + "grad_norm": 0.33203965425491333, + "learning_rate": 1.8935908479639357e-05, + "loss": 0.054901123046875, + "step": 6447 + }, + { + "epoch": 0.4358523725834798, + "grad_norm": 0.6199821829795837, + "learning_rate": 1.893272446701154e-05, + "loss": 0.10056686401367188, + "step": 6448 + }, + { + "epoch": 0.435919967554414, + "grad_norm": 0.9663643836975098, + "learning_rate": 1.8929540264094053e-05, + "loss": 0.206878662109375, + "step": 6449 + }, + { + "epoch": 0.4359875625253481, + "grad_norm": 0.46166667342185974, + "learning_rate": 1.892635587104097e-05, + "loss": 0.0780181884765625, + "step": 6450 + }, + { + "epoch": 0.43605515749628226, + "grad_norm": 0.8276761770248413, + "learning_rate": 1.8923171288006373e-05, + "loss": 0.1627655029296875, + "step": 6451 + }, + { + "epoch": 0.43612275246721643, + "grad_norm": 0.26859715580940247, + "learning_rate": 1.8919986515144358e-05, + "loss": 0.04656982421875, + "step": 6452 + }, + { + "epoch": 0.4361903474381506, + "grad_norm": 0.285308837890625, + "learning_rate": 1.891680155260901e-05, + "loss": 0.060272216796875, + "step": 6453 + }, + { + "epoch": 0.4362579424090848, + "grad_norm": 0.5780194401741028, + "learning_rate": 1.891361640055445e-05, + "loss": 0.0817108154296875, + "step": 6454 + }, + { + "epoch": 0.43632553738001895, + "grad_norm": 0.39072978496551514, + "learning_rate": 1.8910431059134784e-05, + "loss": 0.07350921630859375, + "step": 6455 + }, + { + "epoch": 0.4363931323509531, + "grad_norm": 0.2917715907096863, + "learning_rate": 1.8907245528504145e-05, + "loss": 0.05754661560058594, + "step": 6456 + }, + { + "epoch": 0.43646072732188723, + "grad_norm": 0.3799058794975281, + "learning_rate": 1.8904059808816676e-05, + "loss": 0.07361602783203125, + "step": 6457 + }, + { + "epoch": 0.4365283222928214, + "grad_norm": 0.2552148401737213, + "learning_rate": 1.8900873900226508e-05, + "loss": 0.05523681640625, + "step": 6458 + }, + { + "epoch": 0.4365959172637556, + "grad_norm": 1.3284655809402466, + "learning_rate": 1.88976878028878e-05, + "loss": 0.212799072265625, + "step": 6459 + }, + { + "epoch": 0.43666351223468974, + "grad_norm": 0.9489997625350952, + "learning_rate": 1.889450151695472e-05, + "loss": 0.22174072265625, + "step": 6460 + }, + { + "epoch": 0.4367311072056239, + "grad_norm": 0.7718937993049622, + "learning_rate": 1.8891315042581438e-05, + "loss": 0.17681884765625, + "step": 6461 + }, + { + "epoch": 0.4367987021765581, + "grad_norm": 1.2119760513305664, + "learning_rate": 1.888812837992213e-05, + "loss": 0.1283721923828125, + "step": 6462 + }, + { + "epoch": 0.4368662971474922, + "grad_norm": 0.5248210430145264, + "learning_rate": 1.888494152913099e-05, + "loss": 0.132568359375, + "step": 6463 + }, + { + "epoch": 0.43693389211842637, + "grad_norm": 0.7209523320198059, + "learning_rate": 1.8881754490362227e-05, + "loss": 0.1417236328125, + "step": 6464 + }, + { + "epoch": 0.43700148708936054, + "grad_norm": 1.0352391004562378, + "learning_rate": 1.8878567263770036e-05, + "loss": 0.1468505859375, + "step": 6465 + }, + { + "epoch": 0.4370690820602947, + "grad_norm": 0.8412242531776428, + "learning_rate": 1.887537984950864e-05, + "loss": 0.12451171875, + "step": 6466 + }, + { + "epoch": 0.4371366770312289, + "grad_norm": 0.6020086407661438, + "learning_rate": 1.8872192247732268e-05, + "loss": 0.11144256591796875, + "step": 6467 + }, + { + "epoch": 0.43720427200216305, + "grad_norm": 1.1804897785186768, + "learning_rate": 1.8869004458595153e-05, + "loss": 0.221588134765625, + "step": 6468 + }, + { + "epoch": 0.4372718669730972, + "grad_norm": 0.37339043617248535, + "learning_rate": 1.8865816482251543e-05, + "loss": 0.0904388427734375, + "step": 6469 + }, + { + "epoch": 0.43733946194403134, + "grad_norm": 0.3303956687450409, + "learning_rate": 1.8862628318855688e-05, + "loss": 0.0460968017578125, + "step": 6470 + }, + { + "epoch": 0.4374070569149655, + "grad_norm": 0.9548647999763489, + "learning_rate": 1.885943996856185e-05, + "loss": 0.16391754150390625, + "step": 6471 + }, + { + "epoch": 0.4374746518858997, + "grad_norm": 0.5221644639968872, + "learning_rate": 1.8856251431524313e-05, + "loss": 0.104095458984375, + "step": 6472 + }, + { + "epoch": 0.43754224685683385, + "grad_norm": 0.27038705348968506, + "learning_rate": 1.8853062707897344e-05, + "loss": 0.05522918701171875, + "step": 6473 + }, + { + "epoch": 0.437609841827768, + "grad_norm": 0.741753876209259, + "learning_rate": 1.8849873797835244e-05, + "loss": 0.1593780517578125, + "step": 6474 + }, + { + "epoch": 0.4376774367987022, + "grad_norm": 0.5212265253067017, + "learning_rate": 1.8846684701492304e-05, + "loss": 0.0952606201171875, + "step": 6475 + }, + { + "epoch": 0.43774503176963636, + "grad_norm": 0.16287778317928314, + "learning_rate": 1.8843495419022835e-05, + "loss": 0.03182220458984375, + "step": 6476 + }, + { + "epoch": 0.4378126267405705, + "grad_norm": 0.1724703311920166, + "learning_rate": 1.8840305950581157e-05, + "loss": 0.03627777099609375, + "step": 6477 + }, + { + "epoch": 0.43788022171150465, + "grad_norm": 0.9615545868873596, + "learning_rate": 1.8837116296321595e-05, + "loss": 0.1554718017578125, + "step": 6478 + }, + { + "epoch": 0.4379478166824388, + "grad_norm": 0.2847244143486023, + "learning_rate": 1.8833926456398483e-05, + "loss": 0.050800323486328125, + "step": 6479 + }, + { + "epoch": 0.438015411653373, + "grad_norm": 0.8494630455970764, + "learning_rate": 1.8830736430966167e-05, + "loss": 0.11971282958984375, + "step": 6480 + }, + { + "epoch": 0.43808300662430716, + "grad_norm": 0.24744582176208496, + "learning_rate": 1.8827546220179e-05, + "loss": 0.03845977783203125, + "step": 6481 + }, + { + "epoch": 0.43815060159524133, + "grad_norm": 0.24572589993476868, + "learning_rate": 1.882435582419134e-05, + "loss": 0.047176361083984375, + "step": 6482 + }, + { + "epoch": 0.4382181965661755, + "grad_norm": 0.22049367427825928, + "learning_rate": 1.8821165243157568e-05, + "loss": 0.05107879638671875, + "step": 6483 + }, + { + "epoch": 0.4382857915371096, + "grad_norm": 0.716902494430542, + "learning_rate": 1.8817974477232048e-05, + "loss": 0.142822265625, + "step": 6484 + }, + { + "epoch": 0.4383533865080438, + "grad_norm": 0.5474714040756226, + "learning_rate": 1.8814783526569187e-05, + "loss": 0.148162841796875, + "step": 6485 + }, + { + "epoch": 0.43842098147897796, + "grad_norm": 0.18086716532707214, + "learning_rate": 1.8811592391323373e-05, + "loss": 0.03546714782714844, + "step": 6486 + }, + { + "epoch": 0.4384885764499121, + "grad_norm": 0.861315131187439, + "learning_rate": 1.8808401071649017e-05, + "loss": 0.12762832641601562, + "step": 6487 + }, + { + "epoch": 0.4385561714208463, + "grad_norm": 0.32860758900642395, + "learning_rate": 1.8805209567700528e-05, + "loss": 0.07437896728515625, + "step": 6488 + }, + { + "epoch": 0.43862376639178047, + "grad_norm": 1.037284016609192, + "learning_rate": 1.880201787963234e-05, + "loss": 0.161865234375, + "step": 6489 + }, + { + "epoch": 0.43869136136271464, + "grad_norm": 0.31630608439445496, + "learning_rate": 1.8798826007598883e-05, + "loss": 0.0559234619140625, + "step": 6490 + }, + { + "epoch": 0.43875895633364875, + "grad_norm": 0.28952082991600037, + "learning_rate": 1.8795633951754596e-05, + "loss": 0.0582122802734375, + "step": 6491 + }, + { + "epoch": 0.4388265513045829, + "grad_norm": 0.632568895816803, + "learning_rate": 1.8792441712253937e-05, + "loss": 0.13365554809570312, + "step": 6492 + }, + { + "epoch": 0.4388941462755171, + "grad_norm": 1.1181210279464722, + "learning_rate": 1.8789249289251357e-05, + "loss": 0.2445068359375, + "step": 6493 + }, + { + "epoch": 0.43896174124645126, + "grad_norm": 0.4581887722015381, + "learning_rate": 1.878605668290134e-05, + "loss": 0.07476806640625, + "step": 6494 + }, + { + "epoch": 0.43902933621738544, + "grad_norm": 0.45791926980018616, + "learning_rate": 1.878286389335835e-05, + "loss": 0.09928131103515625, + "step": 6495 + }, + { + "epoch": 0.4390969311883196, + "grad_norm": 1.1809430122375488, + "learning_rate": 1.877967092077688e-05, + "loss": 0.170684814453125, + "step": 6496 + }, + { + "epoch": 0.4391645261592538, + "grad_norm": 0.4387259781360626, + "learning_rate": 1.8776477765311425e-05, + "loss": 0.0809173583984375, + "step": 6497 + }, + { + "epoch": 0.4392321211301879, + "grad_norm": 0.35705071687698364, + "learning_rate": 1.877328442711649e-05, + "loss": 0.04421234130859375, + "step": 6498 + }, + { + "epoch": 0.43929971610112206, + "grad_norm": 0.4687510132789612, + "learning_rate": 1.877009090634659e-05, + "loss": 0.1044769287109375, + "step": 6499 + }, + { + "epoch": 0.43936731107205623, + "grad_norm": 0.7756433486938477, + "learning_rate": 1.8766897203156245e-05, + "loss": 0.1220550537109375, + "step": 6500 + }, + { + "epoch": 0.4394349060429904, + "grad_norm": 0.49337124824523926, + "learning_rate": 1.8763703317699984e-05, + "loss": 0.1040496826171875, + "step": 6501 + }, + { + "epoch": 0.4395025010139246, + "grad_norm": 0.5548787117004395, + "learning_rate": 1.8760509250132355e-05, + "loss": 0.08117103576660156, + "step": 6502 + }, + { + "epoch": 0.43957009598485874, + "grad_norm": 0.25143963098526, + "learning_rate": 1.8757315000607904e-05, + "loss": 0.03096771240234375, + "step": 6503 + }, + { + "epoch": 0.4396376909557929, + "grad_norm": 0.24820727109909058, + "learning_rate": 1.8754120569281183e-05, + "loss": 0.060955047607421875, + "step": 6504 + }, + { + "epoch": 0.43970528592672703, + "grad_norm": 0.5465244650840759, + "learning_rate": 1.8750925956306762e-05, + "loss": 0.0899810791015625, + "step": 6505 + }, + { + "epoch": 0.4397728808976612, + "grad_norm": 0.5169799327850342, + "learning_rate": 1.8747731161839222e-05, + "loss": 0.1407623291015625, + "step": 6506 + }, + { + "epoch": 0.43984047586859537, + "grad_norm": 0.5259788632392883, + "learning_rate": 1.8744536186033133e-05, + "loss": 0.10369873046875, + "step": 6507 + }, + { + "epoch": 0.43990807083952954, + "grad_norm": 0.7205077409744263, + "learning_rate": 1.8741341029043105e-05, + "loss": 0.12225341796875, + "step": 6508 + }, + { + "epoch": 0.4399756658104637, + "grad_norm": 0.4559254050254822, + "learning_rate": 1.873814569102372e-05, + "loss": 0.07131195068359375, + "step": 6509 + }, + { + "epoch": 0.4400432607813979, + "grad_norm": 0.5252221822738647, + "learning_rate": 1.8734950172129606e-05, + "loss": 0.1194000244140625, + "step": 6510 + }, + { + "epoch": 0.44011085575233205, + "grad_norm": 0.32107627391815186, + "learning_rate": 1.8731754472515375e-05, + "loss": 0.066680908203125, + "step": 6511 + }, + { + "epoch": 0.44017845072326617, + "grad_norm": 0.62278151512146, + "learning_rate": 1.8728558592335657e-05, + "loss": 0.1126708984375, + "step": 6512 + }, + { + "epoch": 0.44024604569420034, + "grad_norm": 0.3576337993144989, + "learning_rate": 1.8725362531745083e-05, + "loss": 0.0815582275390625, + "step": 6513 + }, + { + "epoch": 0.4403136406651345, + "grad_norm": 0.7848999500274658, + "learning_rate": 1.8722166290898303e-05, + "loss": 0.1774444580078125, + "step": 6514 + }, + { + "epoch": 0.4403812356360687, + "grad_norm": 0.5330049395561218, + "learning_rate": 1.8718969869949973e-05, + "loss": 0.11298370361328125, + "step": 6515 + }, + { + "epoch": 0.44044883060700285, + "grad_norm": 0.41615408658981323, + "learning_rate": 1.8715773269054748e-05, + "loss": 0.0712127685546875, + "step": 6516 + }, + { + "epoch": 0.440516425577937, + "grad_norm": 1.4653778076171875, + "learning_rate": 1.8712576488367304e-05, + "loss": 0.20323944091796875, + "step": 6517 + }, + { + "epoch": 0.4405840205488712, + "grad_norm": 0.28692588210105896, + "learning_rate": 1.8709379528042325e-05, + "loss": 0.05548095703125, + "step": 6518 + }, + { + "epoch": 0.4406516155198053, + "grad_norm": 0.8493531346321106, + "learning_rate": 1.8706182388234493e-05, + "loss": 0.174652099609375, + "step": 6519 + }, + { + "epoch": 0.4407192104907395, + "grad_norm": 0.5991470217704773, + "learning_rate": 1.870298506909851e-05, + "loss": 0.1124267578125, + "step": 6520 + }, + { + "epoch": 0.44078680546167365, + "grad_norm": 1.4038562774658203, + "learning_rate": 1.8699787570789084e-05, + "loss": 0.2036285400390625, + "step": 6521 + }, + { + "epoch": 0.4408544004326078, + "grad_norm": 0.5408946871757507, + "learning_rate": 1.8696589893460917e-05, + "loss": 0.10143280029296875, + "step": 6522 + }, + { + "epoch": 0.440921995403542, + "grad_norm": 0.6837210655212402, + "learning_rate": 1.8693392037268753e-05, + "loss": 0.13886260986328125, + "step": 6523 + }, + { + "epoch": 0.44098959037447616, + "grad_norm": 0.2788340449333191, + "learning_rate": 1.8690194002367308e-05, + "loss": 0.050018310546875, + "step": 6524 + }, + { + "epoch": 0.4410571853454103, + "grad_norm": 0.3030955195426941, + "learning_rate": 1.8686995788911327e-05, + "loss": 0.0389251708984375, + "step": 6525 + }, + { + "epoch": 0.44112478031634444, + "grad_norm": 0.8544759750366211, + "learning_rate": 1.8683797397055558e-05, + "loss": 0.179779052734375, + "step": 6526 + }, + { + "epoch": 0.4411923752872786, + "grad_norm": 0.8227709531784058, + "learning_rate": 1.868059882695477e-05, + "loss": 0.18096923828125, + "step": 6527 + }, + { + "epoch": 0.4412599702582128, + "grad_norm": 0.4170165956020355, + "learning_rate": 1.867740007876372e-05, + "loss": 0.07522201538085938, + "step": 6528 + }, + { + "epoch": 0.44132756522914696, + "grad_norm": 1.3341330289840698, + "learning_rate": 1.867420115263718e-05, + "loss": 0.205841064453125, + "step": 6529 + }, + { + "epoch": 0.4413951602000811, + "grad_norm": 1.8685120344161987, + "learning_rate": 1.8671002048729938e-05, + "loss": 0.257476806640625, + "step": 6530 + }, + { + "epoch": 0.4414627551710153, + "grad_norm": 0.22882740199565887, + "learning_rate": 1.8667802767196792e-05, + "loss": 0.03773689270019531, + "step": 6531 + }, + { + "epoch": 0.4415303501419494, + "grad_norm": 0.4906623959541321, + "learning_rate": 1.8664603308192537e-05, + "loss": 0.10077667236328125, + "step": 6532 + }, + { + "epoch": 0.4415979451128836, + "grad_norm": 0.4504089951515198, + "learning_rate": 1.8661403671871984e-05, + "loss": 0.1300201416015625, + "step": 6533 + }, + { + "epoch": 0.44166554008381775, + "grad_norm": 0.5316611528396606, + "learning_rate": 1.8658203858389947e-05, + "loss": 0.08963775634765625, + "step": 6534 + }, + { + "epoch": 0.4417331350547519, + "grad_norm": 0.3124646842479706, + "learning_rate": 1.865500386790126e-05, + "loss": 0.0542449951171875, + "step": 6535 + }, + { + "epoch": 0.4418007300256861, + "grad_norm": 0.5315858721733093, + "learning_rate": 1.865180370056076e-05, + "loss": 0.1232452392578125, + "step": 6536 + }, + { + "epoch": 0.44186832499662027, + "grad_norm": 1.0716536045074463, + "learning_rate": 1.864860335652328e-05, + "loss": 0.1845855712890625, + "step": 6537 + }, + { + "epoch": 0.44193591996755444, + "grad_norm": 0.14096775650978088, + "learning_rate": 1.864540283594368e-05, + "loss": 0.027337074279785156, + "step": 6538 + }, + { + "epoch": 0.44200351493848855, + "grad_norm": 0.39098113775253296, + "learning_rate": 1.8642202138976827e-05, + "loss": 0.08774566650390625, + "step": 6539 + }, + { + "epoch": 0.4420711099094227, + "grad_norm": 0.44875743985176086, + "learning_rate": 1.863900126577758e-05, + "loss": 0.085906982421875, + "step": 6540 + }, + { + "epoch": 0.4421387048803569, + "grad_norm": 0.8877440690994263, + "learning_rate": 1.863580021650082e-05, + "loss": 0.1441192626953125, + "step": 6541 + }, + { + "epoch": 0.44220629985129106, + "grad_norm": 0.3381885588169098, + "learning_rate": 1.8632598991301428e-05, + "loss": 0.05401611328125, + "step": 6542 + }, + { + "epoch": 0.44227389482222523, + "grad_norm": 0.2553752660751343, + "learning_rate": 1.8629397590334317e-05, + "loss": 0.06439208984375, + "step": 6543 + }, + { + "epoch": 0.4423414897931594, + "grad_norm": 0.7381729483604431, + "learning_rate": 1.8626196013754375e-05, + "loss": 0.14263916015625, + "step": 6544 + }, + { + "epoch": 0.4424090847640936, + "grad_norm": 0.2007269561290741, + "learning_rate": 1.862299426171652e-05, + "loss": 0.04880523681640625, + "step": 6545 + }, + { + "epoch": 0.4424766797350277, + "grad_norm": 0.9764930009841919, + "learning_rate": 1.861979233437567e-05, + "loss": 0.167205810546875, + "step": 6546 + }, + { + "epoch": 0.44254427470596186, + "grad_norm": 1.7075860500335693, + "learning_rate": 1.8616590231886755e-05, + "loss": 0.25921630859375, + "step": 6547 + }, + { + "epoch": 0.44261186967689603, + "grad_norm": 0.18035127222537994, + "learning_rate": 1.8613387954404712e-05, + "loss": 0.039173126220703125, + "step": 6548 + }, + { + "epoch": 0.4426794646478302, + "grad_norm": 1.2787714004516602, + "learning_rate": 1.861018550208449e-05, + "loss": 0.22296142578125, + "step": 6549 + }, + { + "epoch": 0.44274705961876437, + "grad_norm": 0.3821077346801758, + "learning_rate": 1.8606982875081048e-05, + "loss": 0.07244873046875, + "step": 6550 + }, + { + "epoch": 0.44281465458969854, + "grad_norm": 0.9793640375137329, + "learning_rate": 1.8603780073549336e-05, + "loss": 0.12868690490722656, + "step": 6551 + }, + { + "epoch": 0.4428822495606327, + "grad_norm": 0.20800837874412537, + "learning_rate": 1.8600577097644335e-05, + "loss": 0.025920867919921875, + "step": 6552 + }, + { + "epoch": 0.4429498445315668, + "grad_norm": 1.116837739944458, + "learning_rate": 1.859737394752102e-05, + "loss": 0.2420654296875, + "step": 6553 + }, + { + "epoch": 0.443017439502501, + "grad_norm": 0.5384976267814636, + "learning_rate": 1.8594170623334385e-05, + "loss": 0.1427154541015625, + "step": 6554 + }, + { + "epoch": 0.44308503447343517, + "grad_norm": 0.5667754411697388, + "learning_rate": 1.859096712523942e-05, + "loss": 0.09479522705078125, + "step": 6555 + }, + { + "epoch": 0.44315262944436934, + "grad_norm": 0.23317484557628632, + "learning_rate": 1.8587763453391133e-05, + "loss": 0.041515350341796875, + "step": 6556 + }, + { + "epoch": 0.4432202244153035, + "grad_norm": 0.4404408633708954, + "learning_rate": 1.858455960794454e-05, + "loss": 0.154296875, + "step": 6557 + }, + { + "epoch": 0.4432878193862377, + "grad_norm": 0.16970273852348328, + "learning_rate": 1.8581355589054665e-05, + "loss": 0.02466297149658203, + "step": 6558 + }, + { + "epoch": 0.44335541435717185, + "grad_norm": 0.8965474963188171, + "learning_rate": 1.857815139687653e-05, + "loss": 0.127655029296875, + "step": 6559 + }, + { + "epoch": 0.44342300932810597, + "grad_norm": 0.27202728390693665, + "learning_rate": 1.8574947031565173e-05, + "loss": 0.073944091796875, + "step": 6560 + }, + { + "epoch": 0.44349060429904014, + "grad_norm": 0.7450730800628662, + "learning_rate": 1.8571742493275653e-05, + "loss": 0.13299179077148438, + "step": 6561 + }, + { + "epoch": 0.4435581992699743, + "grad_norm": 0.43987882137298584, + "learning_rate": 1.8568537782163022e-05, + "loss": 0.091796875, + "step": 6562 + }, + { + "epoch": 0.4436257942409085, + "grad_norm": 0.8438189625740051, + "learning_rate": 1.8565332898382335e-05, + "loss": 0.15924072265625, + "step": 6563 + }, + { + "epoch": 0.44369338921184265, + "grad_norm": 1.1319973468780518, + "learning_rate": 1.856212784208867e-05, + "loss": 0.20318603515625, + "step": 6564 + }, + { + "epoch": 0.4437609841827768, + "grad_norm": 0.9565728306770325, + "learning_rate": 1.855892261343711e-05, + "loss": 0.16638946533203125, + "step": 6565 + }, + { + "epoch": 0.443828579153711, + "grad_norm": 0.41122153401374817, + "learning_rate": 1.8555717212582738e-05, + "loss": 0.085296630859375, + "step": 6566 + }, + { + "epoch": 0.4438961741246451, + "grad_norm": 0.5927674770355225, + "learning_rate": 1.855251163968066e-05, + "loss": 0.12808990478515625, + "step": 6567 + }, + { + "epoch": 0.4439637690955793, + "grad_norm": 0.23698899149894714, + "learning_rate": 1.8549305894885968e-05, + "loss": 0.0379180908203125, + "step": 6568 + }, + { + "epoch": 0.44403136406651345, + "grad_norm": 0.6501039862632751, + "learning_rate": 1.8546099978353787e-05, + "loss": 0.1322021484375, + "step": 6569 + }, + { + "epoch": 0.4440989590374476, + "grad_norm": 0.7221131920814514, + "learning_rate": 1.854289389023924e-05, + "loss": 0.121734619140625, + "step": 6570 + }, + { + "epoch": 0.4441665540083818, + "grad_norm": 1.2922883033752441, + "learning_rate": 1.853968763069745e-05, + "loss": 0.216064453125, + "step": 6571 + }, + { + "epoch": 0.44423414897931596, + "grad_norm": 0.4569682776927948, + "learning_rate": 1.8536481199883552e-05, + "loss": 0.10291290283203125, + "step": 6572 + }, + { + "epoch": 0.4443017439502501, + "grad_norm": 0.3457741439342499, + "learning_rate": 1.8533274597952714e-05, + "loss": 0.06768035888671875, + "step": 6573 + }, + { + "epoch": 0.44436933892118424, + "grad_norm": 0.32256460189819336, + "learning_rate": 1.8530067825060073e-05, + "loss": 0.05191802978515625, + "step": 6574 + }, + { + "epoch": 0.4444369338921184, + "grad_norm": 1.0382822751998901, + "learning_rate": 1.8526860881360795e-05, + "loss": 0.11455154418945312, + "step": 6575 + }, + { + "epoch": 0.4445045288630526, + "grad_norm": 0.9006142020225525, + "learning_rate": 1.8523653767010053e-05, + "loss": 0.14812850952148438, + "step": 6576 + }, + { + "epoch": 0.44457212383398675, + "grad_norm": 1.5027028322219849, + "learning_rate": 1.8520446482163035e-05, + "loss": 0.2344970703125, + "step": 6577 + }, + { + "epoch": 0.4446397188049209, + "grad_norm": 0.874775767326355, + "learning_rate": 1.851723902697492e-05, + "loss": 0.22991943359375, + "step": 6578 + }, + { + "epoch": 0.4447073137758551, + "grad_norm": 1.1024365425109863, + "learning_rate": 1.8514031401600906e-05, + "loss": 0.129425048828125, + "step": 6579 + }, + { + "epoch": 0.44477490874678927, + "grad_norm": 0.6497998237609863, + "learning_rate": 1.8510823606196197e-05, + "loss": 0.14479827880859375, + "step": 6580 + }, + { + "epoch": 0.4448425037177234, + "grad_norm": 1.391198754310608, + "learning_rate": 1.850761564091601e-05, + "loss": 0.1812744140625, + "step": 6581 + }, + { + "epoch": 0.44491009868865755, + "grad_norm": 1.187965989112854, + "learning_rate": 1.8504407505915565e-05, + "loss": 0.18187713623046875, + "step": 6582 + }, + { + "epoch": 0.4449776936595917, + "grad_norm": 1.2922112941741943, + "learning_rate": 1.850119920135009e-05, + "loss": 0.17498779296875, + "step": 6583 + }, + { + "epoch": 0.4450452886305259, + "grad_norm": 1.3817425966262817, + "learning_rate": 1.849799072737483e-05, + "loss": 0.13661575317382812, + "step": 6584 + }, + { + "epoch": 0.44511288360146006, + "grad_norm": 0.2669103145599365, + "learning_rate": 1.849478208414502e-05, + "loss": 0.0454864501953125, + "step": 6585 + }, + { + "epoch": 0.44518047857239423, + "grad_norm": 0.5205411314964294, + "learning_rate": 1.849157327181592e-05, + "loss": 0.10268402099609375, + "step": 6586 + }, + { + "epoch": 0.44524807354332835, + "grad_norm": 0.8592303395271301, + "learning_rate": 1.848836429054279e-05, + "loss": 0.1606292724609375, + "step": 6587 + }, + { + "epoch": 0.4453156685142625, + "grad_norm": 0.6960332989692688, + "learning_rate": 1.8485155140480903e-05, + "loss": 0.09418106079101562, + "step": 6588 + }, + { + "epoch": 0.4453832634851967, + "grad_norm": 1.0604945421218872, + "learning_rate": 1.8481945821785534e-05, + "loss": 0.1727294921875, + "step": 6589 + }, + { + "epoch": 0.44545085845613086, + "grad_norm": 0.60337233543396, + "learning_rate": 1.8478736334611975e-05, + "loss": 0.1498260498046875, + "step": 6590 + }, + { + "epoch": 0.44551845342706503, + "grad_norm": 0.2634490132331848, + "learning_rate": 1.8475526679115517e-05, + "loss": 0.05101776123046875, + "step": 6591 + }, + { + "epoch": 0.4455860483979992, + "grad_norm": 1.4492660760879517, + "learning_rate": 1.8472316855451465e-05, + "loss": 0.213165283203125, + "step": 6592 + }, + { + "epoch": 0.44565364336893337, + "grad_norm": 1.0731638669967651, + "learning_rate": 1.8469106863775128e-05, + "loss": 0.1482086181640625, + "step": 6593 + }, + { + "epoch": 0.4457212383398675, + "grad_norm": 0.4448750615119934, + "learning_rate": 1.8465896704241825e-05, + "loss": 0.06272506713867188, + "step": 6594 + }, + { + "epoch": 0.44578883331080166, + "grad_norm": 0.3705688714981079, + "learning_rate": 1.846268637700689e-05, + "loss": 0.05818939208984375, + "step": 6595 + }, + { + "epoch": 0.44585642828173583, + "grad_norm": 0.8871508836746216, + "learning_rate": 1.8459475882225652e-05, + "loss": 0.12813568115234375, + "step": 6596 + }, + { + "epoch": 0.44592402325267, + "grad_norm": 0.3757362961769104, + "learning_rate": 1.8456265220053457e-05, + "loss": 0.055095672607421875, + "step": 6597 + }, + { + "epoch": 0.44599161822360417, + "grad_norm": 0.45795559883117676, + "learning_rate": 1.8453054390645655e-05, + "loss": 0.04592132568359375, + "step": 6598 + }, + { + "epoch": 0.44605921319453834, + "grad_norm": 0.4944939613342285, + "learning_rate": 1.8449843394157605e-05, + "loss": 0.11052703857421875, + "step": 6599 + }, + { + "epoch": 0.4461268081654725, + "grad_norm": 0.4839058816432953, + "learning_rate": 1.8446632230744685e-05, + "loss": 0.07855224609375, + "step": 6600 + }, + { + "epoch": 0.4461944031364066, + "grad_norm": 0.7727416157722473, + "learning_rate": 1.8443420900562253e-05, + "loss": 0.12728118896484375, + "step": 6601 + }, + { + "epoch": 0.4462619981073408, + "grad_norm": 0.9695771932601929, + "learning_rate": 1.844020940376571e-05, + "loss": 0.12886810302734375, + "step": 6602 + }, + { + "epoch": 0.44632959307827497, + "grad_norm": 0.19004058837890625, + "learning_rate": 1.8436997740510438e-05, + "loss": 0.02492523193359375, + "step": 6603 + }, + { + "epoch": 0.44639718804920914, + "grad_norm": 0.5443059802055359, + "learning_rate": 1.8433785910951844e-05, + "loss": 0.0985565185546875, + "step": 6604 + }, + { + "epoch": 0.4464647830201433, + "grad_norm": 0.6961507797241211, + "learning_rate": 1.8430573915245333e-05, + "loss": 0.120513916015625, + "step": 6605 + }, + { + "epoch": 0.4465323779910775, + "grad_norm": 0.6962429881095886, + "learning_rate": 1.842736175354632e-05, + "loss": 0.14960479736328125, + "step": 6606 + }, + { + "epoch": 0.44659997296201165, + "grad_norm": 0.48890236020088196, + "learning_rate": 1.8424149426010233e-05, + "loss": 0.10286712646484375, + "step": 6607 + }, + { + "epoch": 0.44666756793294576, + "grad_norm": 0.7314695715904236, + "learning_rate": 1.8420936932792502e-05, + "loss": 0.1532440185546875, + "step": 6608 + }, + { + "epoch": 0.44673516290387993, + "grad_norm": 0.5460810661315918, + "learning_rate": 1.8417724274048566e-05, + "loss": 0.12496185302734375, + "step": 6609 + }, + { + "epoch": 0.4468027578748141, + "grad_norm": 0.41268110275268555, + "learning_rate": 1.8414511449933874e-05, + "loss": 0.06119537353515625, + "step": 6610 + }, + { + "epoch": 0.4468703528457483, + "grad_norm": 0.566396176815033, + "learning_rate": 1.8411298460603885e-05, + "loss": 0.12100982666015625, + "step": 6611 + }, + { + "epoch": 0.44693794781668245, + "grad_norm": 0.5020471811294556, + "learning_rate": 1.8408085306214065e-05, + "loss": 0.06863784790039062, + "step": 6612 + }, + { + "epoch": 0.4470055427876166, + "grad_norm": 0.6194890141487122, + "learning_rate": 1.8404871986919877e-05, + "loss": 0.1228790283203125, + "step": 6613 + }, + { + "epoch": 0.4470731377585508, + "grad_norm": 1.2142772674560547, + "learning_rate": 1.840165850287681e-05, + "loss": 0.246917724609375, + "step": 6614 + }, + { + "epoch": 0.4471407327294849, + "grad_norm": 0.4834938049316406, + "learning_rate": 1.839844485424035e-05, + "loss": 0.06835174560546875, + "step": 6615 + }, + { + "epoch": 0.4472083277004191, + "grad_norm": 1.0128356218338013, + "learning_rate": 1.8395231041165996e-05, + "loss": 0.210113525390625, + "step": 6616 + }, + { + "epoch": 0.44727592267135324, + "grad_norm": 1.0339998006820679, + "learning_rate": 1.8392017063809244e-05, + "loss": 0.1458740234375, + "step": 6617 + }, + { + "epoch": 0.4473435176422874, + "grad_norm": 0.3604147136211395, + "learning_rate": 1.8388802922325612e-05, + "loss": 0.0805206298828125, + "step": 6618 + }, + { + "epoch": 0.4474111126132216, + "grad_norm": 0.35214459896087646, + "learning_rate": 1.8385588616870625e-05, + "loss": 0.0891571044921875, + "step": 6619 + }, + { + "epoch": 0.44747870758415575, + "grad_norm": 0.8250811696052551, + "learning_rate": 1.838237414759981e-05, + "loss": 0.13130950927734375, + "step": 6620 + }, + { + "epoch": 0.4475463025550899, + "grad_norm": 0.8707497715950012, + "learning_rate": 1.837915951466869e-05, + "loss": 0.192779541015625, + "step": 6621 + }, + { + "epoch": 0.44761389752602404, + "grad_norm": 0.7477881908416748, + "learning_rate": 1.837594471823282e-05, + "loss": 0.188812255859375, + "step": 6622 + }, + { + "epoch": 0.4476814924969582, + "grad_norm": 0.8156212568283081, + "learning_rate": 1.8372729758447752e-05, + "loss": 0.1549530029296875, + "step": 6623 + }, + { + "epoch": 0.4477490874678924, + "grad_norm": 0.1502053439617157, + "learning_rate": 1.8369514635469043e-05, + "loss": 0.026216506958007812, + "step": 6624 + }, + { + "epoch": 0.44781668243882655, + "grad_norm": 1.3838698863983154, + "learning_rate": 1.836629934945226e-05, + "loss": 0.252044677734375, + "step": 6625 + }, + { + "epoch": 0.4478842774097607, + "grad_norm": 0.38383156061172485, + "learning_rate": 1.8363083900552975e-05, + "loss": 0.08863067626953125, + "step": 6626 + }, + { + "epoch": 0.4479518723806949, + "grad_norm": 0.3930903971195221, + "learning_rate": 1.8359868288926783e-05, + "loss": 0.0992431640625, + "step": 6627 + }, + { + "epoch": 0.44801946735162906, + "grad_norm": 0.28284311294555664, + "learning_rate": 1.8356652514729266e-05, + "loss": 0.040920257568359375, + "step": 6628 + }, + { + "epoch": 0.4480870623225632, + "grad_norm": 0.448064386844635, + "learning_rate": 1.8353436578116027e-05, + "loss": 0.08272552490234375, + "step": 6629 + }, + { + "epoch": 0.44815465729349735, + "grad_norm": 1.1324158906936646, + "learning_rate": 1.835022047924267e-05, + "loss": 0.228851318359375, + "step": 6630 + }, + { + "epoch": 0.4482222522644315, + "grad_norm": 0.5548620820045471, + "learning_rate": 1.834700421826481e-05, + "loss": 0.09984588623046875, + "step": 6631 + }, + { + "epoch": 0.4482898472353657, + "grad_norm": 0.37516751885414124, + "learning_rate": 1.8343787795338076e-05, + "loss": 0.07184219360351562, + "step": 6632 + }, + { + "epoch": 0.44835744220629986, + "grad_norm": 0.5127655267715454, + "learning_rate": 1.8340571210618092e-05, + "loss": 0.1010284423828125, + "step": 6633 + }, + { + "epoch": 0.44842503717723403, + "grad_norm": 0.3444282114505768, + "learning_rate": 1.8337354464260492e-05, + "loss": 0.042148590087890625, + "step": 6634 + }, + { + "epoch": 0.4484926321481682, + "grad_norm": 0.24683664739131927, + "learning_rate": 1.8334137556420933e-05, + "loss": 0.052337646484375, + "step": 6635 + }, + { + "epoch": 0.4485602271191023, + "grad_norm": 0.40934789180755615, + "learning_rate": 1.8330920487255065e-05, + "loss": 0.0952301025390625, + "step": 6636 + }, + { + "epoch": 0.4486278220900365, + "grad_norm": 0.25112465023994446, + "learning_rate": 1.832770325691855e-05, + "loss": 0.045196533203125, + "step": 6637 + }, + { + "epoch": 0.44869541706097066, + "grad_norm": 1.1431992053985596, + "learning_rate": 1.8324485865567054e-05, + "loss": 0.17449951171875, + "step": 6638 + }, + { + "epoch": 0.44876301203190483, + "grad_norm": 0.5235222578048706, + "learning_rate": 1.8321268313356257e-05, + "loss": 0.12214279174804688, + "step": 6639 + }, + { + "epoch": 0.448830607002839, + "grad_norm": 0.4253050684928894, + "learning_rate": 1.8318050600441846e-05, + "loss": 0.056652069091796875, + "step": 6640 + }, + { + "epoch": 0.44889820197377317, + "grad_norm": 0.22097133100032806, + "learning_rate": 1.831483272697951e-05, + "loss": 0.0421295166015625, + "step": 6641 + }, + { + "epoch": 0.44896579694470734, + "grad_norm": 0.19252589344978333, + "learning_rate": 1.8311614693124958e-05, + "loss": 0.0401153564453125, + "step": 6642 + }, + { + "epoch": 0.44903339191564146, + "grad_norm": 0.348945289850235, + "learning_rate": 1.830839649903388e-05, + "loss": 0.057086944580078125, + "step": 6643 + }, + { + "epoch": 0.4491009868865756, + "grad_norm": 0.41079169511795044, + "learning_rate": 1.830517814486202e-05, + "loss": 0.0738525390625, + "step": 6644 + }, + { + "epoch": 0.4491685818575098, + "grad_norm": 1.073065996170044, + "learning_rate": 1.830195963076508e-05, + "loss": 0.1666259765625, + "step": 6645 + }, + { + "epoch": 0.44923617682844397, + "grad_norm": 0.46506059169769287, + "learning_rate": 1.8298740956898793e-05, + "loss": 0.087249755859375, + "step": 6646 + }, + { + "epoch": 0.44930377179937814, + "grad_norm": 0.2720501720905304, + "learning_rate": 1.829552212341891e-05, + "loss": 0.05576324462890625, + "step": 6647 + }, + { + "epoch": 0.4493713667703123, + "grad_norm": 0.6120014786720276, + "learning_rate": 1.8292303130481166e-05, + "loss": 0.1094207763671875, + "step": 6648 + }, + { + "epoch": 0.4494389617412465, + "grad_norm": 0.5151330232620239, + "learning_rate": 1.828908397824133e-05, + "loss": 0.0645599365234375, + "step": 6649 + }, + { + "epoch": 0.4495065567121806, + "grad_norm": 0.6082177758216858, + "learning_rate": 1.8285864666855154e-05, + "loss": 0.158233642578125, + "step": 6650 + }, + { + "epoch": 0.44957415168311476, + "grad_norm": 0.3733943998813629, + "learning_rate": 1.828264519647841e-05, + "loss": 0.06716537475585938, + "step": 6651 + }, + { + "epoch": 0.44964174665404893, + "grad_norm": 1.0427415370941162, + "learning_rate": 1.827942556726687e-05, + "loss": 0.1692352294921875, + "step": 6652 + }, + { + "epoch": 0.4497093416249831, + "grad_norm": 0.8368749618530273, + "learning_rate": 1.827620577937634e-05, + "loss": 0.12811279296875, + "step": 6653 + }, + { + "epoch": 0.4497769365959173, + "grad_norm": 0.26972994208335876, + "learning_rate": 1.827298583296259e-05, + "loss": 0.05559539794921875, + "step": 6654 + }, + { + "epoch": 0.44984453156685145, + "grad_norm": 0.5456302165985107, + "learning_rate": 1.8269765728181435e-05, + "loss": 0.11809539794921875, + "step": 6655 + }, + { + "epoch": 0.44991212653778556, + "grad_norm": 1.4279886484146118, + "learning_rate": 1.8266545465188677e-05, + "loss": 0.204681396484375, + "step": 6656 + }, + { + "epoch": 0.44997972150871973, + "grad_norm": 0.911574125289917, + "learning_rate": 1.8263325044140137e-05, + "loss": 0.15018463134765625, + "step": 6657 + }, + { + "epoch": 0.4500473164796539, + "grad_norm": 0.5895898342132568, + "learning_rate": 1.826010446519164e-05, + "loss": 0.1175689697265625, + "step": 6658 + }, + { + "epoch": 0.4501149114505881, + "grad_norm": 0.708814799785614, + "learning_rate": 1.8256883728499012e-05, + "loss": 0.1321258544921875, + "step": 6659 + }, + { + "epoch": 0.45018250642152224, + "grad_norm": 1.3201189041137695, + "learning_rate": 1.8253662834218088e-05, + "loss": 0.226654052734375, + "step": 6660 + }, + { + "epoch": 0.4502501013924564, + "grad_norm": 0.9240080714225769, + "learning_rate": 1.8250441782504727e-05, + "loss": 0.170196533203125, + "step": 6661 + }, + { + "epoch": 0.4503176963633906, + "grad_norm": 0.5719757676124573, + "learning_rate": 1.8247220573514782e-05, + "loss": 0.131561279296875, + "step": 6662 + }, + { + "epoch": 0.4503852913343247, + "grad_norm": 0.856558084487915, + "learning_rate": 1.824399920740411e-05, + "loss": 0.13396072387695312, + "step": 6663 + }, + { + "epoch": 0.45045288630525887, + "grad_norm": 0.7969547510147095, + "learning_rate": 1.8240777684328577e-05, + "loss": 0.12481689453125, + "step": 6664 + }, + { + "epoch": 0.45052048127619304, + "grad_norm": 0.39516735076904297, + "learning_rate": 1.823755600444407e-05, + "loss": 0.072601318359375, + "step": 6665 + }, + { + "epoch": 0.4505880762471272, + "grad_norm": 0.6656315326690674, + "learning_rate": 1.8234334167906466e-05, + "loss": 0.1266937255859375, + "step": 6666 + }, + { + "epoch": 0.4506556712180614, + "grad_norm": 0.6277667284011841, + "learning_rate": 1.8231112174871668e-05, + "loss": 0.14267730712890625, + "step": 6667 + }, + { + "epoch": 0.45072326618899555, + "grad_norm": 0.4421542286872864, + "learning_rate": 1.822789002549556e-05, + "loss": 0.1212005615234375, + "step": 6668 + }, + { + "epoch": 0.4507908611599297, + "grad_norm": 0.7682211995124817, + "learning_rate": 1.8224667719934063e-05, + "loss": 0.210113525390625, + "step": 6669 + }, + { + "epoch": 0.45085845613086384, + "grad_norm": 0.4114672839641571, + "learning_rate": 1.822144525834309e-05, + "loss": 0.1055450439453125, + "step": 6670 + }, + { + "epoch": 0.450926051101798, + "grad_norm": 0.5264106392860413, + "learning_rate": 1.821822264087856e-05, + "loss": 0.11077880859375, + "step": 6671 + }, + { + "epoch": 0.4509936460727322, + "grad_norm": 0.18873853981494904, + "learning_rate": 1.82149998676964e-05, + "loss": 0.03277587890625, + "step": 6672 + }, + { + "epoch": 0.45106124104366635, + "grad_norm": 0.33223220705986023, + "learning_rate": 1.8211776938952558e-05, + "loss": 0.08762359619140625, + "step": 6673 + }, + { + "epoch": 0.4511288360146005, + "grad_norm": 0.6745707392692566, + "learning_rate": 1.8208553854802972e-05, + "loss": 0.1053619384765625, + "step": 6674 + }, + { + "epoch": 0.4511964309855347, + "grad_norm": 1.4288853406906128, + "learning_rate": 1.8205330615403594e-05, + "loss": 0.21490478515625, + "step": 6675 + }, + { + "epoch": 0.45126402595646886, + "grad_norm": 0.21616590023040771, + "learning_rate": 1.8202107220910393e-05, + "loss": 0.039459228515625, + "step": 6676 + }, + { + "epoch": 0.451331620927403, + "grad_norm": 0.4971674382686615, + "learning_rate": 1.8198883671479324e-05, + "loss": 0.10443115234375, + "step": 6677 + }, + { + "epoch": 0.45139921589833715, + "grad_norm": 0.6510540843009949, + "learning_rate": 1.819565996726637e-05, + "loss": 0.1809844970703125, + "step": 6678 + }, + { + "epoch": 0.4514668108692713, + "grad_norm": 0.5940566062927246, + "learning_rate": 1.8192436108427523e-05, + "loss": 0.11002159118652344, + "step": 6679 + }, + { + "epoch": 0.4515344058402055, + "grad_norm": 0.38668930530548096, + "learning_rate": 1.8189212095118752e-05, + "loss": 0.0765838623046875, + "step": 6680 + }, + { + "epoch": 0.45160200081113966, + "grad_norm": 1.0421972274780273, + "learning_rate": 1.8185987927496072e-05, + "loss": 0.16585540771484375, + "step": 6681 + }, + { + "epoch": 0.45166959578207383, + "grad_norm": 0.9647307395935059, + "learning_rate": 1.8182763605715483e-05, + "loss": 0.170684814453125, + "step": 6682 + }, + { + "epoch": 0.451737190753008, + "grad_norm": 0.2835061252117157, + "learning_rate": 1.8179539129933e-05, + "loss": 0.04410552978515625, + "step": 6683 + }, + { + "epoch": 0.4518047857239421, + "grad_norm": 0.6654551029205322, + "learning_rate": 1.8176314500304634e-05, + "loss": 0.10040283203125, + "step": 6684 + }, + { + "epoch": 0.4518723806948763, + "grad_norm": 0.8906107544898987, + "learning_rate": 1.8173089716986417e-05, + "loss": 0.1717529296875, + "step": 6685 + }, + { + "epoch": 0.45193997566581046, + "grad_norm": 1.2976233959197998, + "learning_rate": 1.8169864780134392e-05, + "loss": 0.15677261352539062, + "step": 6686 + }, + { + "epoch": 0.4520075706367446, + "grad_norm": 0.6275020837783813, + "learning_rate": 1.8166639689904598e-05, + "loss": 0.138580322265625, + "step": 6687 + }, + { + "epoch": 0.4520751656076788, + "grad_norm": 0.8724679946899414, + "learning_rate": 1.8163414446453077e-05, + "loss": 0.192840576171875, + "step": 6688 + }, + { + "epoch": 0.45214276057861297, + "grad_norm": 0.6136296987533569, + "learning_rate": 1.8160189049935895e-05, + "loss": 0.08465194702148438, + "step": 6689 + }, + { + "epoch": 0.45221035554954714, + "grad_norm": 0.6993298530578613, + "learning_rate": 1.8156963500509117e-05, + "loss": 0.112823486328125, + "step": 6690 + }, + { + "epoch": 0.45227795052048125, + "grad_norm": 0.3358374536037445, + "learning_rate": 1.8153737798328807e-05, + "loss": 0.049556732177734375, + "step": 6691 + }, + { + "epoch": 0.4523455454914154, + "grad_norm": 0.3068338930606842, + "learning_rate": 1.8150511943551052e-05, + "loss": 0.055507659912109375, + "step": 6692 + }, + { + "epoch": 0.4524131404623496, + "grad_norm": 0.28714612126350403, + "learning_rate": 1.814728593633193e-05, + "loss": 0.0499420166015625, + "step": 6693 + }, + { + "epoch": 0.45248073543328377, + "grad_norm": 0.9184508919715881, + "learning_rate": 1.814405977682755e-05, + "loss": 0.10453033447265625, + "step": 6694 + }, + { + "epoch": 0.45254833040421794, + "grad_norm": 3.2868878841400146, + "learning_rate": 1.8140833465194002e-05, + "loss": 0.16906166076660156, + "step": 6695 + }, + { + "epoch": 0.4526159253751521, + "grad_norm": 0.8448676466941833, + "learning_rate": 1.8137607001587397e-05, + "loss": 0.1473388671875, + "step": 6696 + }, + { + "epoch": 0.4526835203460863, + "grad_norm": 0.2933199107646942, + "learning_rate": 1.8134380386163854e-05, + "loss": 0.04485321044921875, + "step": 6697 + }, + { + "epoch": 0.4527511153170204, + "grad_norm": 0.2535878121852875, + "learning_rate": 1.8131153619079493e-05, + "loss": 0.035854339599609375, + "step": 6698 + }, + { + "epoch": 0.45281871028795456, + "grad_norm": 1.0259759426116943, + "learning_rate": 1.812792670049045e-05, + "loss": 0.14259719848632812, + "step": 6699 + }, + { + "epoch": 0.45288630525888873, + "grad_norm": 0.5148988366127014, + "learning_rate": 1.812469963055286e-05, + "loss": 0.09320831298828125, + "step": 6700 + }, + { + "epoch": 0.4529539002298229, + "grad_norm": 1.0914554595947266, + "learning_rate": 1.8121472409422875e-05, + "loss": 0.2286376953125, + "step": 6701 + }, + { + "epoch": 0.4530214952007571, + "grad_norm": 0.30065983533859253, + "learning_rate": 1.8118245037256634e-05, + "loss": 0.048053741455078125, + "step": 6702 + }, + { + "epoch": 0.45308909017169124, + "grad_norm": 0.1902901977300644, + "learning_rate": 1.8115017514210312e-05, + "loss": 0.031524658203125, + "step": 6703 + }, + { + "epoch": 0.4531566851426254, + "grad_norm": 0.8882989883422852, + "learning_rate": 1.8111789840440068e-05, + "loss": 0.1351318359375, + "step": 6704 + }, + { + "epoch": 0.45322428011355953, + "grad_norm": 0.3556908667087555, + "learning_rate": 1.810856201610208e-05, + "loss": 0.052509307861328125, + "step": 6705 + }, + { + "epoch": 0.4532918750844937, + "grad_norm": 0.5239554643630981, + "learning_rate": 1.810533404135253e-05, + "loss": 0.129974365234375, + "step": 6706 + }, + { + "epoch": 0.45335947005542787, + "grad_norm": 0.7342113852500916, + "learning_rate": 1.8102105916347606e-05, + "loss": 0.1429443359375, + "step": 6707 + }, + { + "epoch": 0.45342706502636204, + "grad_norm": 1.8934701681137085, + "learning_rate": 1.8098877641243505e-05, + "loss": 0.169891357421875, + "step": 6708 + }, + { + "epoch": 0.4534946599972962, + "grad_norm": 0.6242309808731079, + "learning_rate": 1.8095649216196436e-05, + "loss": 0.1573028564453125, + "step": 6709 + }, + { + "epoch": 0.4535622549682304, + "grad_norm": 0.6432151198387146, + "learning_rate": 1.80924206413626e-05, + "loss": 0.169219970703125, + "step": 6710 + }, + { + "epoch": 0.45362984993916455, + "grad_norm": 0.23239587247371674, + "learning_rate": 1.8089191916898225e-05, + "loss": 0.053661346435546875, + "step": 6711 + }, + { + "epoch": 0.45369744491009867, + "grad_norm": 0.9508026838302612, + "learning_rate": 1.8085963042959537e-05, + "loss": 0.1787872314453125, + "step": 6712 + }, + { + "epoch": 0.45376503988103284, + "grad_norm": 0.5122100710868835, + "learning_rate": 1.8082734019702764e-05, + "loss": 0.09996414184570312, + "step": 6713 + }, + { + "epoch": 0.453832634851967, + "grad_norm": 0.5852866768836975, + "learning_rate": 1.8079504847284146e-05, + "loss": 0.1209869384765625, + "step": 6714 + }, + { + "epoch": 0.4539002298229012, + "grad_norm": 0.6968294382095337, + "learning_rate": 1.8076275525859934e-05, + "loss": 0.13120269775390625, + "step": 6715 + }, + { + "epoch": 0.45396782479383535, + "grad_norm": 0.9264609217643738, + "learning_rate": 1.807304605558638e-05, + "loss": 0.1564483642578125, + "step": 6716 + }, + { + "epoch": 0.4540354197647695, + "grad_norm": 0.2610769271850586, + "learning_rate": 1.8069816436619746e-05, + "loss": 0.049591064453125, + "step": 6717 + }, + { + "epoch": 0.45410301473570364, + "grad_norm": 0.17297931015491486, + "learning_rate": 1.80665866691163e-05, + "loss": 0.03453826904296875, + "step": 6718 + }, + { + "epoch": 0.4541706097066378, + "grad_norm": 0.33320343494415283, + "learning_rate": 1.8063356753232326e-05, + "loss": 0.06171417236328125, + "step": 6719 + }, + { + "epoch": 0.454238204677572, + "grad_norm": 0.8694645762443542, + "learning_rate": 1.80601266891241e-05, + "loss": 0.1166229248046875, + "step": 6720 + }, + { + "epoch": 0.45430579964850615, + "grad_norm": 0.5632918477058411, + "learning_rate": 1.805689647694791e-05, + "loss": 0.0800323486328125, + "step": 6721 + }, + { + "epoch": 0.4543733946194403, + "grad_norm": 0.16161909699440002, + "learning_rate": 1.805366611686006e-05, + "loss": 0.0351409912109375, + "step": 6722 + }, + { + "epoch": 0.4544409895903745, + "grad_norm": 0.20587225258350372, + "learning_rate": 1.805043560901685e-05, + "loss": 0.0487518310546875, + "step": 6723 + }, + { + "epoch": 0.45450858456130866, + "grad_norm": 0.546423614025116, + "learning_rate": 1.8047204953574598e-05, + "loss": 0.12542724609375, + "step": 6724 + }, + { + "epoch": 0.4545761795322428, + "grad_norm": 0.5804012417793274, + "learning_rate": 1.8043974150689622e-05, + "loss": 0.1161346435546875, + "step": 6725 + }, + { + "epoch": 0.45464377450317695, + "grad_norm": 0.6282299757003784, + "learning_rate": 1.8040743200518244e-05, + "loss": 0.1136016845703125, + "step": 6726 + }, + { + "epoch": 0.4547113694741111, + "grad_norm": 0.7449626326560974, + "learning_rate": 1.8037512103216796e-05, + "loss": 0.193634033203125, + "step": 6727 + }, + { + "epoch": 0.4547789644450453, + "grad_norm": 0.32707834243774414, + "learning_rate": 1.803428085894163e-05, + "loss": 0.0597381591796875, + "step": 6728 + }, + { + "epoch": 0.45484655941597946, + "grad_norm": 0.4244785010814667, + "learning_rate": 1.8031049467849082e-05, + "loss": 0.0876922607421875, + "step": 6729 + }, + { + "epoch": 0.4549141543869136, + "grad_norm": 0.6725302338600159, + "learning_rate": 1.802781793009551e-05, + "loss": 0.16827392578125, + "step": 6730 + }, + { + "epoch": 0.4549817493578478, + "grad_norm": 0.13532358407974243, + "learning_rate": 1.8024586245837274e-05, + "loss": 0.0254058837890625, + "step": 6731 + }, + { + "epoch": 0.4550493443287819, + "grad_norm": 0.35201704502105713, + "learning_rate": 1.802135441523075e-05, + "loss": 0.05971527099609375, + "step": 6732 + }, + { + "epoch": 0.4551169392997161, + "grad_norm": 0.7618640661239624, + "learning_rate": 1.801812243843231e-05, + "loss": 0.11322021484375, + "step": 6733 + }, + { + "epoch": 0.45518453427065025, + "grad_norm": 0.5780993103981018, + "learning_rate": 1.8014890315598335e-05, + "loss": 0.08080291748046875, + "step": 6734 + }, + { + "epoch": 0.4552521292415844, + "grad_norm": 0.5041376352310181, + "learning_rate": 1.8011658046885223e-05, + "loss": 0.09698486328125, + "step": 6735 + }, + { + "epoch": 0.4553197242125186, + "grad_norm": 0.981499195098877, + "learning_rate": 1.8008425632449354e-05, + "loss": 0.1806640625, + "step": 6736 + }, + { + "epoch": 0.45538731918345277, + "grad_norm": 0.6222550868988037, + "learning_rate": 1.8005193072447157e-05, + "loss": 0.0767974853515625, + "step": 6737 + }, + { + "epoch": 0.45545491415438694, + "grad_norm": 0.5700080990791321, + "learning_rate": 1.8001960367035024e-05, + "loss": 0.0974273681640625, + "step": 6738 + }, + { + "epoch": 0.45552250912532105, + "grad_norm": 0.8699584007263184, + "learning_rate": 1.7998727516369375e-05, + "loss": 0.174224853515625, + "step": 6739 + }, + { + "epoch": 0.4555901040962552, + "grad_norm": 0.3741641938686371, + "learning_rate": 1.799549452060664e-05, + "loss": 0.05718231201171875, + "step": 6740 + }, + { + "epoch": 0.4556576990671894, + "grad_norm": 1.1665598154067993, + "learning_rate": 1.799226137990326e-05, + "loss": 0.180328369140625, + "step": 6741 + }, + { + "epoch": 0.45572529403812356, + "grad_norm": 0.17014390230178833, + "learning_rate": 1.798902809441566e-05, + "loss": 0.02812957763671875, + "step": 6742 + }, + { + "epoch": 0.45579288900905773, + "grad_norm": 0.5180891156196594, + "learning_rate": 1.798579466430029e-05, + "loss": 0.10010528564453125, + "step": 6743 + }, + { + "epoch": 0.4558604839799919, + "grad_norm": 0.3410394489765167, + "learning_rate": 1.798256108971361e-05, + "loss": 0.049785614013671875, + "step": 6744 + }, + { + "epoch": 0.4559280789509261, + "grad_norm": 1.2703955173492432, + "learning_rate": 1.7979327370812074e-05, + "loss": 0.1159820556640625, + "step": 6745 + }, + { + "epoch": 0.4559956739218602, + "grad_norm": 0.7997536659240723, + "learning_rate": 1.7976093507752155e-05, + "loss": 0.11838531494140625, + "step": 6746 + }, + { + "epoch": 0.45606326889279436, + "grad_norm": 0.972923219203949, + "learning_rate": 1.797285950069032e-05, + "loss": 0.19708251953125, + "step": 6747 + }, + { + "epoch": 0.45613086386372853, + "grad_norm": 0.700322151184082, + "learning_rate": 1.7969625349783052e-05, + "loss": 0.131927490234375, + "step": 6748 + }, + { + "epoch": 0.4561984588346627, + "grad_norm": 0.7199207544326782, + "learning_rate": 1.796639105518685e-05, + "loss": 0.11808013916015625, + "step": 6749 + }, + { + "epoch": 0.45626605380559687, + "grad_norm": 1.6330599784851074, + "learning_rate": 1.7963156617058197e-05, + "loss": 0.15083694458007812, + "step": 6750 + }, + { + "epoch": 0.45633364877653104, + "grad_norm": 0.3410293161869049, + "learning_rate": 1.79599220355536e-05, + "loss": 0.052532196044921875, + "step": 6751 + }, + { + "epoch": 0.4564012437474652, + "grad_norm": 0.23756591975688934, + "learning_rate": 1.7956687310829563e-05, + "loss": 0.0315093994140625, + "step": 6752 + }, + { + "epoch": 0.45646883871839933, + "grad_norm": 0.29032954573631287, + "learning_rate": 1.7953452443042605e-05, + "loss": 0.031673431396484375, + "step": 6753 + }, + { + "epoch": 0.4565364336893335, + "grad_norm": 0.3368605077266693, + "learning_rate": 1.7950217432349258e-05, + "loss": 0.053089141845703125, + "step": 6754 + }, + { + "epoch": 0.45660402866026767, + "grad_norm": 0.708538293838501, + "learning_rate": 1.794698227890604e-05, + "loss": 0.1151580810546875, + "step": 6755 + }, + { + "epoch": 0.45667162363120184, + "grad_norm": 1.2327543497085571, + "learning_rate": 1.794374698286949e-05, + "loss": 0.11453056335449219, + "step": 6756 + }, + { + "epoch": 0.456739218602136, + "grad_norm": 1.3005037307739258, + "learning_rate": 1.7940511544396158e-05, + "loss": 0.13865280151367188, + "step": 6757 + }, + { + "epoch": 0.4568068135730702, + "grad_norm": 0.5113822221755981, + "learning_rate": 1.793727596364259e-05, + "loss": 0.107818603515625, + "step": 6758 + }, + { + "epoch": 0.45687440854400435, + "grad_norm": 1.1950658559799194, + "learning_rate": 1.793404024076535e-05, + "loss": 0.1846771240234375, + "step": 6759 + }, + { + "epoch": 0.45694200351493847, + "grad_norm": 0.4637773633003235, + "learning_rate": 1.793080437592099e-05, + "loss": 0.09203720092773438, + "step": 6760 + }, + { + "epoch": 0.45700959848587264, + "grad_norm": 0.7889136672019958, + "learning_rate": 1.7927568369266087e-05, + "loss": 0.18109130859375, + "step": 6761 + }, + { + "epoch": 0.4570771934568068, + "grad_norm": 0.520566463470459, + "learning_rate": 1.7924332220957223e-05, + "loss": 0.0569915771484375, + "step": 6762 + }, + { + "epoch": 0.457144788427741, + "grad_norm": 0.22358949482440948, + "learning_rate": 1.792109593115098e-05, + "loss": 0.03469276428222656, + "step": 6763 + }, + { + "epoch": 0.45721238339867515, + "grad_norm": 0.3450626730918884, + "learning_rate": 1.7917859500003943e-05, + "loss": 0.1033172607421875, + "step": 6764 + }, + { + "epoch": 0.4572799783696093, + "grad_norm": 0.8601443767547607, + "learning_rate": 1.7914622927672723e-05, + "loss": 0.12554550170898438, + "step": 6765 + }, + { + "epoch": 0.4573475733405435, + "grad_norm": 2.756338119506836, + "learning_rate": 1.791138621431392e-05, + "loss": 0.21872711181640625, + "step": 6766 + }, + { + "epoch": 0.4574151683114776, + "grad_norm": 1.042504906654358, + "learning_rate": 1.7908149360084148e-05, + "loss": 0.16448974609375, + "step": 6767 + }, + { + "epoch": 0.4574827632824118, + "grad_norm": 0.8850703239440918, + "learning_rate": 1.790491236514002e-05, + "loss": 0.180419921875, + "step": 6768 + }, + { + "epoch": 0.45755035825334595, + "grad_norm": 0.5918729305267334, + "learning_rate": 1.7901675229638167e-05, + "loss": 0.095703125, + "step": 6769 + }, + { + "epoch": 0.4576179532242801, + "grad_norm": 0.9719499945640564, + "learning_rate": 1.789843795373522e-05, + "loss": 0.21917724609375, + "step": 6770 + }, + { + "epoch": 0.4576855481952143, + "grad_norm": 0.7571773529052734, + "learning_rate": 1.7895200537587825e-05, + "loss": 0.1128082275390625, + "step": 6771 + }, + { + "epoch": 0.45775314316614846, + "grad_norm": 0.681938111782074, + "learning_rate": 1.789196298135262e-05, + "loss": 0.09943008422851562, + "step": 6772 + }, + { + "epoch": 0.45782073813708263, + "grad_norm": 1.1062580347061157, + "learning_rate": 1.788872528518626e-05, + "loss": 0.224700927734375, + "step": 6773 + }, + { + "epoch": 0.45788833310801674, + "grad_norm": 0.41900381445884705, + "learning_rate": 1.788548744924541e-05, + "loss": 0.07616043090820312, + "step": 6774 + }, + { + "epoch": 0.4579559280789509, + "grad_norm": 0.49964165687561035, + "learning_rate": 1.7882249473686727e-05, + "loss": 0.11114501953125, + "step": 6775 + }, + { + "epoch": 0.4580235230498851, + "grad_norm": 0.45599520206451416, + "learning_rate": 1.7879011358666895e-05, + "loss": 0.0969390869140625, + "step": 6776 + }, + { + "epoch": 0.45809111802081925, + "grad_norm": 1.5644530057907104, + "learning_rate": 1.7875773104342586e-05, + "loss": 0.222930908203125, + "step": 6777 + }, + { + "epoch": 0.4581587129917534, + "grad_norm": 0.3793744742870331, + "learning_rate": 1.787253471087049e-05, + "loss": 0.0746917724609375, + "step": 6778 + }, + { + "epoch": 0.4582263079626876, + "grad_norm": 0.4699075520038605, + "learning_rate": 1.78692961784073e-05, + "loss": 0.09708404541015625, + "step": 6779 + }, + { + "epoch": 0.45829390293362177, + "grad_norm": 0.8154213428497314, + "learning_rate": 1.786605750710972e-05, + "loss": 0.187744140625, + "step": 6780 + }, + { + "epoch": 0.4583614979045559, + "grad_norm": 0.45162761211395264, + "learning_rate": 1.7862818697134452e-05, + "loss": 0.0990447998046875, + "step": 6781 + }, + { + "epoch": 0.45842909287549005, + "grad_norm": 1.2221556901931763, + "learning_rate": 1.785957974863821e-05, + "loss": 0.1870269775390625, + "step": 6782 + }, + { + "epoch": 0.4584966878464242, + "grad_norm": 0.3792955279350281, + "learning_rate": 1.785634066177772e-05, + "loss": 0.06347274780273438, + "step": 6783 + }, + { + "epoch": 0.4585642828173584, + "grad_norm": 0.478519469499588, + "learning_rate": 1.7853101436709706e-05, + "loss": 0.073394775390625, + "step": 6784 + }, + { + "epoch": 0.45863187778829256, + "grad_norm": 0.5635616779327393, + "learning_rate": 1.7849862073590896e-05, + "loss": 0.058444976806640625, + "step": 6785 + }, + { + "epoch": 0.45869947275922673, + "grad_norm": 0.6575496196746826, + "learning_rate": 1.7846622572578036e-05, + "loss": 0.088531494140625, + "step": 6786 + }, + { + "epoch": 0.45876706773016085, + "grad_norm": 1.431217074394226, + "learning_rate": 1.7843382933827878e-05, + "loss": 0.16741371154785156, + "step": 6787 + }, + { + "epoch": 0.458834662701095, + "grad_norm": 0.40747547149658203, + "learning_rate": 1.7840143157497167e-05, + "loss": 0.08789825439453125, + "step": 6788 + }, + { + "epoch": 0.4589022576720292, + "grad_norm": 0.6234543323516846, + "learning_rate": 1.783690324374267e-05, + "loss": 0.1158905029296875, + "step": 6789 + }, + { + "epoch": 0.45896985264296336, + "grad_norm": 0.6183943748474121, + "learning_rate": 1.7833663192721148e-05, + "loss": 0.1429595947265625, + "step": 6790 + }, + { + "epoch": 0.45903744761389753, + "grad_norm": 0.27190151810646057, + "learning_rate": 1.783042300458938e-05, + "loss": 0.03858184814453125, + "step": 6791 + }, + { + "epoch": 0.4591050425848317, + "grad_norm": 0.4453916549682617, + "learning_rate": 1.7827182679504147e-05, + "loss": 0.08382415771484375, + "step": 6792 + }, + { + "epoch": 0.4591726375557659, + "grad_norm": 0.34343594312667847, + "learning_rate": 1.7823942217622235e-05, + "loss": 0.06909561157226562, + "step": 6793 + }, + { + "epoch": 0.4592402325267, + "grad_norm": 0.18700554966926575, + "learning_rate": 1.7820701619100426e-05, + "loss": 0.032070159912109375, + "step": 6794 + }, + { + "epoch": 0.45930782749763416, + "grad_norm": 0.2503042221069336, + "learning_rate": 1.7817460884095545e-05, + "loss": 0.07399749755859375, + "step": 6795 + }, + { + "epoch": 0.45937542246856833, + "grad_norm": 0.7538093328475952, + "learning_rate": 1.7814220012764378e-05, + "loss": 0.1715087890625, + "step": 6796 + }, + { + "epoch": 0.4594430174395025, + "grad_norm": 0.5960124731063843, + "learning_rate": 1.781097900526375e-05, + "loss": 0.144256591796875, + "step": 6797 + }, + { + "epoch": 0.45951061241043667, + "grad_norm": 0.45326000452041626, + "learning_rate": 1.7807737861750467e-05, + "loss": 0.1008148193359375, + "step": 6798 + }, + { + "epoch": 0.45957820738137084, + "grad_norm": 0.5117151737213135, + "learning_rate": 1.780449658238137e-05, + "loss": 0.1304473876953125, + "step": 6799 + }, + { + "epoch": 0.459645802352305, + "grad_norm": 0.3005441427230835, + "learning_rate": 1.7801255167313292e-05, + "loss": 0.04965972900390625, + "step": 6800 + }, + { + "epoch": 0.4597133973232391, + "grad_norm": 0.23623937368392944, + "learning_rate": 1.7798013616703063e-05, + "loss": 0.04064178466796875, + "step": 6801 + }, + { + "epoch": 0.4597809922941733, + "grad_norm": 0.7873181700706482, + "learning_rate": 1.779477193070753e-05, + "loss": 0.14025115966796875, + "step": 6802 + }, + { + "epoch": 0.45984858726510747, + "grad_norm": 0.34923216700553894, + "learning_rate": 1.7791530109483558e-05, + "loss": 0.062164306640625, + "step": 6803 + }, + { + "epoch": 0.45991618223604164, + "grad_norm": 0.20114129781723022, + "learning_rate": 1.7788288153187997e-05, + "loss": 0.03874969482421875, + "step": 6804 + }, + { + "epoch": 0.4599837772069758, + "grad_norm": 0.40289589762687683, + "learning_rate": 1.7785046061977715e-05, + "loss": 0.0707244873046875, + "step": 6805 + }, + { + "epoch": 0.46005137217791, + "grad_norm": 0.3227349817752838, + "learning_rate": 1.7781803836009587e-05, + "loss": 0.0552520751953125, + "step": 6806 + }, + { + "epoch": 0.46011896714884415, + "grad_norm": 2.015052080154419, + "learning_rate": 1.777856147544049e-05, + "loss": 0.2495269775390625, + "step": 6807 + }, + { + "epoch": 0.46018656211977826, + "grad_norm": 1.1963330507278442, + "learning_rate": 1.7775318980427302e-05, + "loss": 0.10994720458984375, + "step": 6808 + }, + { + "epoch": 0.46025415709071243, + "grad_norm": 0.7425453662872314, + "learning_rate": 1.777207635112693e-05, + "loss": 0.12240219116210938, + "step": 6809 + }, + { + "epoch": 0.4603217520616466, + "grad_norm": 0.29609760642051697, + "learning_rate": 1.776883358769626e-05, + "loss": 0.05267333984375, + "step": 6810 + }, + { + "epoch": 0.4603893470325808, + "grad_norm": 0.3229462206363678, + "learning_rate": 1.77655906902922e-05, + "loss": 0.0540313720703125, + "step": 6811 + }, + { + "epoch": 0.46045694200351495, + "grad_norm": 1.673941969871521, + "learning_rate": 1.7762347659071667e-05, + "loss": 0.194793701171875, + "step": 6812 + }, + { + "epoch": 0.4605245369744491, + "grad_norm": 0.7988953590393066, + "learning_rate": 1.7759104494191576e-05, + "loss": 0.1207733154296875, + "step": 6813 + }, + { + "epoch": 0.4605921319453833, + "grad_norm": 1.04958975315094, + "learning_rate": 1.7755861195808847e-05, + "loss": 0.180023193359375, + "step": 6814 + }, + { + "epoch": 0.4606597269163174, + "grad_norm": 0.9674664735794067, + "learning_rate": 1.775261776408042e-05, + "loss": 0.1314849853515625, + "step": 6815 + }, + { + "epoch": 0.4607273218872516, + "grad_norm": 0.4300783574581146, + "learning_rate": 1.774937419916322e-05, + "loss": 0.0725860595703125, + "step": 6816 + }, + { + "epoch": 0.46079491685818574, + "grad_norm": 0.5259460806846619, + "learning_rate": 1.7746130501214208e-05, + "loss": 0.109100341796875, + "step": 6817 + }, + { + "epoch": 0.4608625118291199, + "grad_norm": 0.6951059103012085, + "learning_rate": 1.774288667039032e-05, + "loss": 0.11759185791015625, + "step": 6818 + }, + { + "epoch": 0.4609301068000541, + "grad_norm": 0.837063729763031, + "learning_rate": 1.7739642706848517e-05, + "loss": 0.12047576904296875, + "step": 6819 + }, + { + "epoch": 0.46099770177098826, + "grad_norm": 0.3674776554107666, + "learning_rate": 1.773639861074576e-05, + "loss": 0.0672149658203125, + "step": 6820 + }, + { + "epoch": 0.4610652967419224, + "grad_norm": 0.521835207939148, + "learning_rate": 1.7733154382239022e-05, + "loss": 0.12343597412109375, + "step": 6821 + }, + { + "epoch": 0.46113289171285654, + "grad_norm": 1.0089378356933594, + "learning_rate": 1.772991002148528e-05, + "loss": 0.13665771484375, + "step": 6822 + }, + { + "epoch": 0.4612004866837907, + "grad_norm": 0.9770398736000061, + "learning_rate": 1.772666552864151e-05, + "loss": 0.157806396484375, + "step": 6823 + }, + { + "epoch": 0.4612680816547249, + "grad_norm": 0.8712926506996155, + "learning_rate": 1.7723420903864706e-05, + "loss": 0.137115478515625, + "step": 6824 + }, + { + "epoch": 0.46133567662565905, + "grad_norm": 0.8307680487632751, + "learning_rate": 1.7720176147311865e-05, + "loss": 0.11981201171875, + "step": 6825 + }, + { + "epoch": 0.4614032715965932, + "grad_norm": 0.43837109208106995, + "learning_rate": 1.771693125913998e-05, + "loss": 0.0970306396484375, + "step": 6826 + }, + { + "epoch": 0.4614708665675274, + "grad_norm": 0.5398200154304504, + "learning_rate": 1.771368623950607e-05, + "loss": 0.11260223388671875, + "step": 6827 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 0.5112385153770447, + "learning_rate": 1.7710441088567135e-05, + "loss": 0.10344314575195312, + "step": 6828 + }, + { + "epoch": 0.4616060565093957, + "grad_norm": 1.0750999450683594, + "learning_rate": 1.7707195806480213e-05, + "loss": 0.15350341796875, + "step": 6829 + }, + { + "epoch": 0.46167365148032985, + "grad_norm": 0.6180310249328613, + "learning_rate": 1.770395039340232e-05, + "loss": 0.11066436767578125, + "step": 6830 + }, + { + "epoch": 0.461741246451264, + "grad_norm": 0.2595658004283905, + "learning_rate": 1.770070484949049e-05, + "loss": 0.0467071533203125, + "step": 6831 + }, + { + "epoch": 0.4618088414221982, + "grad_norm": 0.5435789823532104, + "learning_rate": 1.7697459174901766e-05, + "loss": 0.0953521728515625, + "step": 6832 + }, + { + "epoch": 0.46187643639313236, + "grad_norm": 0.9485741853713989, + "learning_rate": 1.769421336979319e-05, + "loss": 0.14840316772460938, + "step": 6833 + }, + { + "epoch": 0.46194403136406653, + "grad_norm": 0.4003318250179291, + "learning_rate": 1.7690967434321812e-05, + "loss": 0.0894775390625, + "step": 6834 + }, + { + "epoch": 0.4620116263350007, + "grad_norm": 0.5909557342529297, + "learning_rate": 1.76877213686447e-05, + "loss": 0.09322357177734375, + "step": 6835 + }, + { + "epoch": 0.4620792213059348, + "grad_norm": 0.30571770668029785, + "learning_rate": 1.768447517291891e-05, + "loss": 0.037174224853515625, + "step": 6836 + }, + { + "epoch": 0.462146816276869, + "grad_norm": 0.9745163917541504, + "learning_rate": 1.768122884730152e-05, + "loss": 0.18101119995117188, + "step": 6837 + }, + { + "epoch": 0.46221441124780316, + "grad_norm": 0.6310850977897644, + "learning_rate": 1.7677982391949602e-05, + "loss": 0.10707855224609375, + "step": 6838 + }, + { + "epoch": 0.46228200621873733, + "grad_norm": 0.2174716740846634, + "learning_rate": 1.7674735807020244e-05, + "loss": 0.038921356201171875, + "step": 6839 + }, + { + "epoch": 0.4623496011896715, + "grad_norm": 0.5306812524795532, + "learning_rate": 1.7671489092670526e-05, + "loss": 0.0917510986328125, + "step": 6840 + }, + { + "epoch": 0.46241719616060567, + "grad_norm": 0.5010942220687866, + "learning_rate": 1.766824224905756e-05, + "loss": 0.0809478759765625, + "step": 6841 + }, + { + "epoch": 0.46248479113153984, + "grad_norm": 1.4941580295562744, + "learning_rate": 1.7664995276338442e-05, + "loss": 0.169464111328125, + "step": 6842 + }, + { + "epoch": 0.46255238610247396, + "grad_norm": 0.2681574821472168, + "learning_rate": 1.7661748174670274e-05, + "loss": 0.05765533447265625, + "step": 6843 + }, + { + "epoch": 0.4626199810734081, + "grad_norm": 0.2605505585670471, + "learning_rate": 1.7658500944210175e-05, + "loss": 0.060924530029296875, + "step": 6844 + }, + { + "epoch": 0.4626875760443423, + "grad_norm": 0.802749752998352, + "learning_rate": 1.765525358511527e-05, + "loss": 0.12653350830078125, + "step": 6845 + }, + { + "epoch": 0.46275517101527647, + "grad_norm": 0.4702020287513733, + "learning_rate": 1.7652006097542684e-05, + "loss": 0.07666015625, + "step": 6846 + }, + { + "epoch": 0.46282276598621064, + "grad_norm": 0.8784064650535583, + "learning_rate": 1.764875848164955e-05, + "loss": 0.1462554931640625, + "step": 6847 + }, + { + "epoch": 0.4628903609571448, + "grad_norm": 1.1019412279129028, + "learning_rate": 1.7645510737593004e-05, + "loss": 0.177032470703125, + "step": 6848 + }, + { + "epoch": 0.4629579559280789, + "grad_norm": 1.1128575801849365, + "learning_rate": 1.7642262865530204e-05, + "loss": 0.191314697265625, + "step": 6849 + }, + { + "epoch": 0.4630255508990131, + "grad_norm": 0.5321345329284668, + "learning_rate": 1.763901486561829e-05, + "loss": 0.11505126953125, + "step": 6850 + }, + { + "epoch": 0.46309314586994726, + "grad_norm": 1.1692856550216675, + "learning_rate": 1.763576673801442e-05, + "loss": 0.17938232421875, + "step": 6851 + }, + { + "epoch": 0.46316074084088144, + "grad_norm": 0.269697904586792, + "learning_rate": 1.7632518482875774e-05, + "loss": 0.04279327392578125, + "step": 6852 + }, + { + "epoch": 0.4632283358118156, + "grad_norm": 0.3334434926509857, + "learning_rate": 1.76292701003595e-05, + "loss": 0.05884552001953125, + "step": 6853 + }, + { + "epoch": 0.4632959307827498, + "grad_norm": 0.9990001916885376, + "learning_rate": 1.76260215906228e-05, + "loss": 0.1386566162109375, + "step": 6854 + }, + { + "epoch": 0.46336352575368395, + "grad_norm": 0.29655158519744873, + "learning_rate": 1.7622772953822843e-05, + "loss": 0.061672210693359375, + "step": 6855 + }, + { + "epoch": 0.46343112072461806, + "grad_norm": 0.5570993423461914, + "learning_rate": 1.761952419011681e-05, + "loss": 0.1380615234375, + "step": 6856 + }, + { + "epoch": 0.46349871569555223, + "grad_norm": 0.3463667929172516, + "learning_rate": 1.761627529966191e-05, + "loss": 0.06934356689453125, + "step": 6857 + }, + { + "epoch": 0.4635663106664864, + "grad_norm": 0.7161250710487366, + "learning_rate": 1.7613026282615345e-05, + "loss": 0.096588134765625, + "step": 6858 + }, + { + "epoch": 0.4636339056374206, + "grad_norm": 0.9080382585525513, + "learning_rate": 1.760977713913432e-05, + "loss": 0.16234588623046875, + "step": 6859 + }, + { + "epoch": 0.46370150060835474, + "grad_norm": 0.7357428073883057, + "learning_rate": 1.760652786937604e-05, + "loss": 0.10186004638671875, + "step": 6860 + }, + { + "epoch": 0.4637690955792889, + "grad_norm": 0.3459053635597229, + "learning_rate": 1.760327847349773e-05, + "loss": 0.062313079833984375, + "step": 6861 + }, + { + "epoch": 0.4638366905502231, + "grad_norm": 1.4126182794570923, + "learning_rate": 1.7600028951656625e-05, + "loss": 0.225128173828125, + "step": 6862 + }, + { + "epoch": 0.4639042855211572, + "grad_norm": 0.41551294922828674, + "learning_rate": 1.7596779304009947e-05, + "loss": 0.05698394775390625, + "step": 6863 + }, + { + "epoch": 0.46397188049209137, + "grad_norm": 0.7434740662574768, + "learning_rate": 1.7593529530714937e-05, + "loss": 0.1570587158203125, + "step": 6864 + }, + { + "epoch": 0.46403947546302554, + "grad_norm": 0.3147088587284088, + "learning_rate": 1.7590279631928838e-05, + "loss": 0.06893157958984375, + "step": 6865 + }, + { + "epoch": 0.4641070704339597, + "grad_norm": 0.5804906487464905, + "learning_rate": 1.7587029607808907e-05, + "loss": 0.08758544921875, + "step": 6866 + }, + { + "epoch": 0.4641746654048939, + "grad_norm": 0.18705476820468903, + "learning_rate": 1.758377945851239e-05, + "loss": 0.03633880615234375, + "step": 6867 + }, + { + "epoch": 0.46424226037582805, + "grad_norm": 0.39132386445999146, + "learning_rate": 1.7580529184196555e-05, + "loss": 0.071868896484375, + "step": 6868 + }, + { + "epoch": 0.4643098553467622, + "grad_norm": 0.6672486066818237, + "learning_rate": 1.757727878501867e-05, + "loss": 0.16650390625, + "step": 6869 + }, + { + "epoch": 0.46437745031769634, + "grad_norm": 0.6339625716209412, + "learning_rate": 1.7574028261136007e-05, + "loss": 0.0807952880859375, + "step": 6870 + }, + { + "epoch": 0.4644450452886305, + "grad_norm": 1.1868047714233398, + "learning_rate": 1.7570777612705855e-05, + "loss": 0.176025390625, + "step": 6871 + }, + { + "epoch": 0.4645126402595647, + "grad_norm": 0.49213388562202454, + "learning_rate": 1.7567526839885492e-05, + "loss": 0.12835693359375, + "step": 6872 + }, + { + "epoch": 0.46458023523049885, + "grad_norm": 0.1486242264509201, + "learning_rate": 1.7564275942832213e-05, + "loss": 0.022815704345703125, + "step": 6873 + }, + { + "epoch": 0.464647830201433, + "grad_norm": 0.5426498055458069, + "learning_rate": 1.7561024921703312e-05, + "loss": 0.1175384521484375, + "step": 6874 + }, + { + "epoch": 0.4647154251723672, + "grad_norm": 0.5717114806175232, + "learning_rate": 1.7557773776656103e-05, + "loss": 0.1122589111328125, + "step": 6875 + }, + { + "epoch": 0.46478302014330136, + "grad_norm": 0.2479540854692459, + "learning_rate": 1.7554522507847893e-05, + "loss": 0.0479888916015625, + "step": 6876 + }, + { + "epoch": 0.4648506151142355, + "grad_norm": 0.8224608302116394, + "learning_rate": 1.7551271115436e-05, + "loss": 0.1145782470703125, + "step": 6877 + }, + { + "epoch": 0.46491821008516965, + "grad_norm": 2.160412311553955, + "learning_rate": 1.754801959957774e-05, + "loss": 0.31597900390625, + "step": 6878 + }, + { + "epoch": 0.4649858050561038, + "grad_norm": 0.19258247315883636, + "learning_rate": 1.7544767960430447e-05, + "loss": 0.021932601928710938, + "step": 6879 + }, + { + "epoch": 0.465053400027038, + "grad_norm": 0.4222341775894165, + "learning_rate": 1.7541516198151457e-05, + "loss": 0.06804656982421875, + "step": 6880 + }, + { + "epoch": 0.46512099499797216, + "grad_norm": 1.5753138065338135, + "learning_rate": 1.7538264312898104e-05, + "loss": 0.21807861328125, + "step": 6881 + }, + { + "epoch": 0.46518858996890633, + "grad_norm": 0.46091508865356445, + "learning_rate": 1.7535012304827737e-05, + "loss": 0.08257293701171875, + "step": 6882 + }, + { + "epoch": 0.4652561849398405, + "grad_norm": 0.8760426640510559, + "learning_rate": 1.753176017409771e-05, + "loss": 0.178985595703125, + "step": 6883 + }, + { + "epoch": 0.4653237799107746, + "grad_norm": 0.6701269745826721, + "learning_rate": 1.7528507920865386e-05, + "loss": 0.1076812744140625, + "step": 6884 + }, + { + "epoch": 0.4653913748817088, + "grad_norm": 0.5343390107154846, + "learning_rate": 1.752525554528812e-05, + "loss": 0.0859222412109375, + "step": 6885 + }, + { + "epoch": 0.46545896985264296, + "grad_norm": 0.2804040014743805, + "learning_rate": 1.7522003047523286e-05, + "loss": 0.03480339050292969, + "step": 6886 + }, + { + "epoch": 0.4655265648235771, + "grad_norm": 0.31605443358421326, + "learning_rate": 1.751875042772826e-05, + "loss": 0.05127906799316406, + "step": 6887 + }, + { + "epoch": 0.4655941597945113, + "grad_norm": 0.6848072409629822, + "learning_rate": 1.7515497686060428e-05, + "loss": 0.1346435546875, + "step": 6888 + }, + { + "epoch": 0.46566175476544547, + "grad_norm": 0.7307696342468262, + "learning_rate": 1.7512244822677174e-05, + "loss": 0.16558837890625, + "step": 6889 + }, + { + "epoch": 0.46572934973637964, + "grad_norm": 1.4377161264419556, + "learning_rate": 1.750899183773589e-05, + "loss": 0.27777099609375, + "step": 6890 + }, + { + "epoch": 0.46579694470731375, + "grad_norm": 0.8070635795593262, + "learning_rate": 1.7505738731393978e-05, + "loss": 0.14972686767578125, + "step": 6891 + }, + { + "epoch": 0.4658645396782479, + "grad_norm": 0.5197877287864685, + "learning_rate": 1.7502485503808844e-05, + "loss": 0.08792495727539062, + "step": 6892 + }, + { + "epoch": 0.4659321346491821, + "grad_norm": 0.41959095001220703, + "learning_rate": 1.7499232155137897e-05, + "loss": 0.0802459716796875, + "step": 6893 + }, + { + "epoch": 0.46599972962011627, + "grad_norm": 0.1919713020324707, + "learning_rate": 1.7495978685538553e-05, + "loss": 0.040531158447265625, + "step": 6894 + }, + { + "epoch": 0.46606732459105044, + "grad_norm": 0.41505542397499084, + "learning_rate": 1.749272509516824e-05, + "loss": 0.092498779296875, + "step": 6895 + }, + { + "epoch": 0.4661349195619846, + "grad_norm": 0.2303726077079773, + "learning_rate": 1.748947138418439e-05, + "loss": 0.035457611083984375, + "step": 6896 + }, + { + "epoch": 0.4662025145329188, + "grad_norm": 0.817368745803833, + "learning_rate": 1.7486217552744426e-05, + "loss": 0.138671875, + "step": 6897 + }, + { + "epoch": 0.4662701095038529, + "grad_norm": 0.7278552055358887, + "learning_rate": 1.7482963601005798e-05, + "loss": 0.146514892578125, + "step": 6898 + }, + { + "epoch": 0.46633770447478706, + "grad_norm": 0.9181599020957947, + "learning_rate": 1.7479709529125947e-05, + "loss": 0.192108154296875, + "step": 6899 + }, + { + "epoch": 0.46640529944572123, + "grad_norm": 0.3356773257255554, + "learning_rate": 1.7476455337262334e-05, + "loss": 0.06578826904296875, + "step": 6900 + }, + { + "epoch": 0.4664728944166554, + "grad_norm": 0.20475517213344574, + "learning_rate": 1.747320102557241e-05, + "loss": 0.027065277099609375, + "step": 6901 + }, + { + "epoch": 0.4665404893875896, + "grad_norm": 0.507064700126648, + "learning_rate": 1.7469946594213632e-05, + "loss": 0.124725341796875, + "step": 6902 + }, + { + "epoch": 0.46660808435852374, + "grad_norm": 0.1960601806640625, + "learning_rate": 1.7466692043343484e-05, + "loss": 0.039897918701171875, + "step": 6903 + }, + { + "epoch": 0.4666756793294579, + "grad_norm": 0.5166399478912354, + "learning_rate": 1.746343737311943e-05, + "loss": 0.178863525390625, + "step": 6904 + }, + { + "epoch": 0.46674327430039203, + "grad_norm": 1.3059558868408203, + "learning_rate": 1.7460182583698962e-05, + "loss": 0.25408935546875, + "step": 6905 + }, + { + "epoch": 0.4668108692713262, + "grad_norm": 1.2750200033187866, + "learning_rate": 1.745692767523956e-05, + "loss": 0.223846435546875, + "step": 6906 + }, + { + "epoch": 0.46687846424226037, + "grad_norm": 0.9481651186943054, + "learning_rate": 1.7453672647898712e-05, + "loss": 0.192962646484375, + "step": 6907 + }, + { + "epoch": 0.46694605921319454, + "grad_norm": 0.435741662979126, + "learning_rate": 1.7450417501833924e-05, + "loss": 0.11273193359375, + "step": 6908 + }, + { + "epoch": 0.4670136541841287, + "grad_norm": 0.6817866563796997, + "learning_rate": 1.74471622372027e-05, + "loss": 0.143341064453125, + "step": 6909 + }, + { + "epoch": 0.4670812491550629, + "grad_norm": 0.4992871880531311, + "learning_rate": 1.744390685416255e-05, + "loss": 0.08168792724609375, + "step": 6910 + }, + { + "epoch": 0.467148844125997, + "grad_norm": 0.3182085454463959, + "learning_rate": 1.744065135287098e-05, + "loss": 0.07061004638671875, + "step": 6911 + }, + { + "epoch": 0.46721643909693117, + "grad_norm": 0.5242714285850525, + "learning_rate": 1.743739573348553e-05, + "loss": 0.10369873046875, + "step": 6912 + }, + { + "epoch": 0.46728403406786534, + "grad_norm": 0.28153523802757263, + "learning_rate": 1.7434139996163712e-05, + "loss": 0.0454864501953125, + "step": 6913 + }, + { + "epoch": 0.4673516290387995, + "grad_norm": 0.5195698738098145, + "learning_rate": 1.743088414106306e-05, + "loss": 0.097564697265625, + "step": 6914 + }, + { + "epoch": 0.4674192240097337, + "grad_norm": 0.22096167504787445, + "learning_rate": 1.742762816834112e-05, + "loss": 0.039638519287109375, + "step": 6915 + }, + { + "epoch": 0.46748681898066785, + "grad_norm": 0.632984459400177, + "learning_rate": 1.742437207815543e-05, + "loss": 0.12834930419921875, + "step": 6916 + }, + { + "epoch": 0.467554413951602, + "grad_norm": 0.6220722198486328, + "learning_rate": 1.742111587066354e-05, + "loss": 0.12630844116210938, + "step": 6917 + }, + { + "epoch": 0.46762200892253614, + "grad_norm": 1.135465383529663, + "learning_rate": 1.741785954602301e-05, + "loss": 0.15177536010742188, + "step": 6918 + }, + { + "epoch": 0.4676896038934703, + "grad_norm": 0.9029372334480286, + "learning_rate": 1.7414603104391402e-05, + "loss": 0.123809814453125, + "step": 6919 + }, + { + "epoch": 0.4677571988644045, + "grad_norm": 0.9054577946662903, + "learning_rate": 1.7411346545926273e-05, + "loss": 0.174224853515625, + "step": 6920 + }, + { + "epoch": 0.46782479383533865, + "grad_norm": 0.6736283898353577, + "learning_rate": 1.7408089870785205e-05, + "loss": 0.10136795043945312, + "step": 6921 + }, + { + "epoch": 0.4678923888062728, + "grad_norm": 0.8339223861694336, + "learning_rate": 1.7404833079125773e-05, + "loss": 0.17486572265625, + "step": 6922 + }, + { + "epoch": 0.467959983777207, + "grad_norm": 0.5309959650039673, + "learning_rate": 1.7401576171105562e-05, + "loss": 0.09847450256347656, + "step": 6923 + }, + { + "epoch": 0.46802757874814116, + "grad_norm": 0.21880796551704407, + "learning_rate": 1.739831914688216e-05, + "loss": 0.03759574890136719, + "step": 6924 + }, + { + "epoch": 0.4680951737190753, + "grad_norm": 0.4945213794708252, + "learning_rate": 1.7395062006613164e-05, + "loss": 0.09247970581054688, + "step": 6925 + }, + { + "epoch": 0.46816276869000945, + "grad_norm": 0.4239806532859802, + "learning_rate": 1.7391804750456174e-05, + "loss": 0.074676513671875, + "step": 6926 + }, + { + "epoch": 0.4682303636609436, + "grad_norm": 0.7933980822563171, + "learning_rate": 1.7388547378568792e-05, + "loss": 0.164581298828125, + "step": 6927 + }, + { + "epoch": 0.4682979586318778, + "grad_norm": 0.8730037808418274, + "learning_rate": 1.7385289891108633e-05, + "loss": 0.1475677490234375, + "step": 6928 + }, + { + "epoch": 0.46836555360281196, + "grad_norm": 0.5355574488639832, + "learning_rate": 1.738203228823332e-05, + "loss": 0.08357620239257812, + "step": 6929 + }, + { + "epoch": 0.4684331485737461, + "grad_norm": 1.1485052108764648, + "learning_rate": 1.7378774570100468e-05, + "loss": 0.26031494140625, + "step": 6930 + }, + { + "epoch": 0.4685007435446803, + "grad_norm": 0.5896688103675842, + "learning_rate": 1.737551673686771e-05, + "loss": 0.10021209716796875, + "step": 6931 + }, + { + "epoch": 0.4685683385156144, + "grad_norm": 0.8441448211669922, + "learning_rate": 1.7372258788692674e-05, + "loss": 0.134063720703125, + "step": 6932 + }, + { + "epoch": 0.4686359334865486, + "grad_norm": 0.6468759179115295, + "learning_rate": 1.7369000725733014e-05, + "loss": 0.09552001953125, + "step": 6933 + }, + { + "epoch": 0.46870352845748275, + "grad_norm": 0.845518171787262, + "learning_rate": 1.736574254814636e-05, + "loss": 0.1243438720703125, + "step": 6934 + }, + { + "epoch": 0.4687711234284169, + "grad_norm": 0.2768818140029907, + "learning_rate": 1.736248425609037e-05, + "loss": 0.04192352294921875, + "step": 6935 + }, + { + "epoch": 0.4688387183993511, + "grad_norm": 0.618951678276062, + "learning_rate": 1.7359225849722704e-05, + "loss": 0.09206771850585938, + "step": 6936 + }, + { + "epoch": 0.46890631337028527, + "grad_norm": 0.6811530590057373, + "learning_rate": 1.7355967329201016e-05, + "loss": 0.1155548095703125, + "step": 6937 + }, + { + "epoch": 0.46897390834121944, + "grad_norm": 0.3315313160419464, + "learning_rate": 1.7352708694682975e-05, + "loss": 0.03851318359375, + "step": 6938 + }, + { + "epoch": 0.46904150331215355, + "grad_norm": 0.4037865996360779, + "learning_rate": 1.734944994632626e-05, + "loss": 0.09168243408203125, + "step": 6939 + }, + { + "epoch": 0.4691090982830877, + "grad_norm": 0.3557076156139374, + "learning_rate": 1.7346191084288542e-05, + "loss": 0.07898712158203125, + "step": 6940 + }, + { + "epoch": 0.4691766932540219, + "grad_norm": 0.7224873304367065, + "learning_rate": 1.7342932108727508e-05, + "loss": 0.12155914306640625, + "step": 6941 + }, + { + "epoch": 0.46924428822495606, + "grad_norm": 0.5779749751091003, + "learning_rate": 1.733967301980085e-05, + "loss": 0.12041091918945312, + "step": 6942 + }, + { + "epoch": 0.46931188319589023, + "grad_norm": 0.3589193820953369, + "learning_rate": 1.7336413817666262e-05, + "loss": 0.07524871826171875, + "step": 6943 + }, + { + "epoch": 0.4693794781668244, + "grad_norm": 0.4038112461566925, + "learning_rate": 1.7333154502481445e-05, + "loss": 0.0318145751953125, + "step": 6944 + }, + { + "epoch": 0.4694470731377586, + "grad_norm": 0.4850989282131195, + "learning_rate": 1.7329895074404097e-05, + "loss": 0.13393402099609375, + "step": 6945 + }, + { + "epoch": 0.4695146681086927, + "grad_norm": 0.7196997404098511, + "learning_rate": 1.732663553359194e-05, + "loss": 0.117462158203125, + "step": 6946 + }, + { + "epoch": 0.46958226307962686, + "grad_norm": 0.6501538753509521, + "learning_rate": 1.7323375880202692e-05, + "loss": 0.1504364013671875, + "step": 6947 + }, + { + "epoch": 0.46964985805056103, + "grad_norm": 0.6047301888465881, + "learning_rate": 1.732011611439406e-05, + "loss": 0.10599517822265625, + "step": 6948 + }, + { + "epoch": 0.4697174530214952, + "grad_norm": 0.8892297148704529, + "learning_rate": 1.7316856236323788e-05, + "loss": 0.21441650390625, + "step": 6949 + }, + { + "epoch": 0.46978504799242937, + "grad_norm": 0.6627318859100342, + "learning_rate": 1.7313596246149603e-05, + "loss": 0.0774383544921875, + "step": 6950 + }, + { + "epoch": 0.46985264296336354, + "grad_norm": 0.592829704284668, + "learning_rate": 1.731033614402924e-05, + "loss": 0.1212921142578125, + "step": 6951 + }, + { + "epoch": 0.4699202379342977, + "grad_norm": 0.7824421525001526, + "learning_rate": 1.730707593012045e-05, + "loss": 0.1339874267578125, + "step": 6952 + }, + { + "epoch": 0.46998783290523183, + "grad_norm": 0.39709359407424927, + "learning_rate": 1.7303815604580974e-05, + "loss": 0.09588623046875, + "step": 6953 + }, + { + "epoch": 0.470055427876166, + "grad_norm": 0.28597864508628845, + "learning_rate": 1.7300555167568577e-05, + "loss": 0.043331146240234375, + "step": 6954 + }, + { + "epoch": 0.47012302284710017, + "grad_norm": 0.5425344109535217, + "learning_rate": 1.7297294619241012e-05, + "loss": 0.08158493041992188, + "step": 6955 + }, + { + "epoch": 0.47019061781803434, + "grad_norm": 0.7973588109016418, + "learning_rate": 1.7294033959756045e-05, + "loss": 0.15458297729492188, + "step": 6956 + }, + { + "epoch": 0.4702582127889685, + "grad_norm": 0.9490606784820557, + "learning_rate": 1.729077318927145e-05, + "loss": 0.170654296875, + "step": 6957 + }, + { + "epoch": 0.4703258077599027, + "grad_norm": 0.9299719333648682, + "learning_rate": 1.7287512307945e-05, + "loss": 0.14736175537109375, + "step": 6958 + }, + { + "epoch": 0.47039340273083685, + "grad_norm": 0.5318024754524231, + "learning_rate": 1.7284251315934486e-05, + "loss": 0.09142303466796875, + "step": 6959 + }, + { + "epoch": 0.47046099770177097, + "grad_norm": 0.7425425052642822, + "learning_rate": 1.7280990213397685e-05, + "loss": 0.14548492431640625, + "step": 6960 + }, + { + "epoch": 0.47052859267270514, + "grad_norm": 0.19008727371692657, + "learning_rate": 1.7277729000492385e-05, + "loss": 0.032314300537109375, + "step": 6961 + }, + { + "epoch": 0.4705961876436393, + "grad_norm": 0.7023587822914124, + "learning_rate": 1.7274467677376397e-05, + "loss": 0.12486648559570312, + "step": 6962 + }, + { + "epoch": 0.4706637826145735, + "grad_norm": 0.5912395715713501, + "learning_rate": 1.7271206244207515e-05, + "loss": 0.0932159423828125, + "step": 6963 + }, + { + "epoch": 0.47073137758550765, + "grad_norm": 0.33864444494247437, + "learning_rate": 1.7267944701143555e-05, + "loss": 0.07318115234375, + "step": 6964 + }, + { + "epoch": 0.4707989725564418, + "grad_norm": 0.5645292401313782, + "learning_rate": 1.7264683048342323e-05, + "loss": 0.10820388793945312, + "step": 6965 + }, + { + "epoch": 0.470866567527376, + "grad_norm": 1.0586552619934082, + "learning_rate": 1.7261421285961638e-05, + "loss": 0.203399658203125, + "step": 6966 + }, + { + "epoch": 0.4709341624983101, + "grad_norm": 0.2923881709575653, + "learning_rate": 1.7258159414159333e-05, + "loss": 0.0602264404296875, + "step": 6967 + }, + { + "epoch": 0.4710017574692443, + "grad_norm": 0.6051293611526489, + "learning_rate": 1.7254897433093226e-05, + "loss": 0.10418701171875, + "step": 6968 + }, + { + "epoch": 0.47106935244017845, + "grad_norm": 0.28810855746269226, + "learning_rate": 1.7251635342921165e-05, + "loss": 0.064453125, + "step": 6969 + }, + { + "epoch": 0.4711369474111126, + "grad_norm": 0.597865104675293, + "learning_rate": 1.724837314380098e-05, + "loss": 0.095306396484375, + "step": 6970 + }, + { + "epoch": 0.4712045423820468, + "grad_norm": 0.24555975198745728, + "learning_rate": 1.7245110835890523e-05, + "loss": 0.036594390869140625, + "step": 6971 + }, + { + "epoch": 0.47127213735298096, + "grad_norm": 0.40784144401550293, + "learning_rate": 1.7241848419347638e-05, + "loss": 0.050750732421875, + "step": 6972 + }, + { + "epoch": 0.47133973232391513, + "grad_norm": 1.0099562406539917, + "learning_rate": 1.7238585894330186e-05, + "loss": 0.15069580078125, + "step": 6973 + }, + { + "epoch": 0.47140732729484924, + "grad_norm": 0.7921363115310669, + "learning_rate": 1.7235323260996025e-05, + "loss": 0.1457061767578125, + "step": 6974 + }, + { + "epoch": 0.4714749222657834, + "grad_norm": 0.8615878820419312, + "learning_rate": 1.7232060519503024e-05, + "loss": 0.11880874633789062, + "step": 6975 + }, + { + "epoch": 0.4715425172367176, + "grad_norm": 0.624997615814209, + "learning_rate": 1.7228797670009057e-05, + "loss": 0.08924102783203125, + "step": 6976 + }, + { + "epoch": 0.47161011220765175, + "grad_norm": 0.19616425037384033, + "learning_rate": 1.7225534712671996e-05, + "loss": 0.024522781372070312, + "step": 6977 + }, + { + "epoch": 0.4716777071785859, + "grad_norm": 0.2665293216705322, + "learning_rate": 1.722227164764972e-05, + "loss": 0.052764892578125, + "step": 6978 + }, + { + "epoch": 0.4717453021495201, + "grad_norm": 0.14932511746883392, + "learning_rate": 1.7219008475100127e-05, + "loss": 0.021884918212890625, + "step": 6979 + }, + { + "epoch": 0.4718128971204542, + "grad_norm": 1.058045506477356, + "learning_rate": 1.7215745195181106e-05, + "loss": 0.184478759765625, + "step": 6980 + }, + { + "epoch": 0.4718804920913884, + "grad_norm": 0.19603194296360016, + "learning_rate": 1.721248180805055e-05, + "loss": 0.04369354248046875, + "step": 6981 + }, + { + "epoch": 0.47194808706232255, + "grad_norm": 1.2271934747695923, + "learning_rate": 1.7209218313866365e-05, + "loss": 0.25469970703125, + "step": 6982 + }, + { + "epoch": 0.4720156820332567, + "grad_norm": 0.701124370098114, + "learning_rate": 1.7205954712786457e-05, + "loss": 0.129913330078125, + "step": 6983 + }, + { + "epoch": 0.4720832770041909, + "grad_norm": 0.5152977705001831, + "learning_rate": 1.7202691004968745e-05, + "loss": 0.08880615234375, + "step": 6984 + }, + { + "epoch": 0.47215087197512506, + "grad_norm": 0.3967287540435791, + "learning_rate": 1.7199427190571145e-05, + "loss": 0.076263427734375, + "step": 6985 + }, + { + "epoch": 0.47221846694605923, + "grad_norm": 0.6930866837501526, + "learning_rate": 1.7196163269751573e-05, + "loss": 0.150299072265625, + "step": 6986 + }, + { + "epoch": 0.47228606191699335, + "grad_norm": 0.27795884013175964, + "learning_rate": 1.7192899242667967e-05, + "loss": 0.05277061462402344, + "step": 6987 + }, + { + "epoch": 0.4723536568879275, + "grad_norm": 0.9734449982643127, + "learning_rate": 1.718963510947826e-05, + "loss": 0.1643524169921875, + "step": 6988 + }, + { + "epoch": 0.4724212518588617, + "grad_norm": 0.757146418094635, + "learning_rate": 1.718637087034039e-05, + "loss": 0.14106369018554688, + "step": 6989 + }, + { + "epoch": 0.47248884682979586, + "grad_norm": 0.4303414523601532, + "learning_rate": 1.71831065254123e-05, + "loss": 0.0455474853515625, + "step": 6990 + }, + { + "epoch": 0.47255644180073003, + "grad_norm": 1.092693567276001, + "learning_rate": 1.7179842074851935e-05, + "loss": 0.20635986328125, + "step": 6991 + }, + { + "epoch": 0.4726240367716642, + "grad_norm": 1.4457272291183472, + "learning_rate": 1.7176577518817256e-05, + "loss": 0.2000274658203125, + "step": 6992 + }, + { + "epoch": 0.4726916317425984, + "grad_norm": 0.22850622236728668, + "learning_rate": 1.7173312857466225e-05, + "loss": 0.03536224365234375, + "step": 6993 + }, + { + "epoch": 0.4727592267135325, + "grad_norm": 1.0818545818328857, + "learning_rate": 1.7170048090956798e-05, + "loss": 0.1925048828125, + "step": 6994 + }, + { + "epoch": 0.47282682168446666, + "grad_norm": 0.3064119517803192, + "learning_rate": 1.7166783219446955e-05, + "loss": 0.06953811645507812, + "step": 6995 + }, + { + "epoch": 0.47289441665540083, + "grad_norm": 0.6361005902290344, + "learning_rate": 1.7163518243094656e-05, + "loss": 0.1039276123046875, + "step": 6996 + }, + { + "epoch": 0.472962011626335, + "grad_norm": 0.5123898386955261, + "learning_rate": 1.7160253162057895e-05, + "loss": 0.08779144287109375, + "step": 6997 + }, + { + "epoch": 0.47302960659726917, + "grad_norm": 0.7590909600257874, + "learning_rate": 1.7156987976494652e-05, + "loss": 0.139678955078125, + "step": 6998 + }, + { + "epoch": 0.47309720156820334, + "grad_norm": 0.6710449457168579, + "learning_rate": 1.7153722686562913e-05, + "loss": 0.14715576171875, + "step": 6999 + }, + { + "epoch": 0.4731647965391375, + "grad_norm": 0.980815052986145, + "learning_rate": 1.7150457292420676e-05, + "loss": 0.185791015625, + "step": 7000 + }, + { + "epoch": 0.4732323915100716, + "grad_norm": 0.5289198160171509, + "learning_rate": 1.7147191794225945e-05, + "loss": 0.10846710205078125, + "step": 7001 + }, + { + "epoch": 0.4732999864810058, + "grad_norm": 0.5579239726066589, + "learning_rate": 1.7143926192136723e-05, + "loss": 0.132354736328125, + "step": 7002 + }, + { + "epoch": 0.47336758145193997, + "grad_norm": 0.8756968379020691, + "learning_rate": 1.7140660486311014e-05, + "loss": 0.148895263671875, + "step": 7003 + }, + { + "epoch": 0.47343517642287414, + "grad_norm": 0.36520910263061523, + "learning_rate": 1.713739467690684e-05, + "loss": 0.06476593017578125, + "step": 7004 + }, + { + "epoch": 0.4735027713938083, + "grad_norm": 0.3845710754394531, + "learning_rate": 1.713412876408222e-05, + "loss": 0.078704833984375, + "step": 7005 + }, + { + "epoch": 0.4735703663647425, + "grad_norm": 1.0133439302444458, + "learning_rate": 1.7130862747995183e-05, + "loss": 0.16168212890625, + "step": 7006 + }, + { + "epoch": 0.47363796133567665, + "grad_norm": 0.30717042088508606, + "learning_rate": 1.712759662880375e-05, + "loss": 0.06431961059570312, + "step": 7007 + }, + { + "epoch": 0.47370555630661076, + "grad_norm": 0.7332025170326233, + "learning_rate": 1.712433040666596e-05, + "loss": 0.15716552734375, + "step": 7008 + }, + { + "epoch": 0.47377315127754493, + "grad_norm": 0.8897356390953064, + "learning_rate": 1.7121064081739854e-05, + "loss": 0.17218017578125, + "step": 7009 + }, + { + "epoch": 0.4738407462484791, + "grad_norm": 0.2067202478647232, + "learning_rate": 1.7117797654183477e-05, + "loss": 0.035312652587890625, + "step": 7010 + }, + { + "epoch": 0.4739083412194133, + "grad_norm": 0.32870545983314514, + "learning_rate": 1.711453112415488e-05, + "loss": 0.064453125, + "step": 7011 + }, + { + "epoch": 0.47397593619034745, + "grad_norm": 1.077500820159912, + "learning_rate": 1.711126449181211e-05, + "loss": 0.221649169921875, + "step": 7012 + }, + { + "epoch": 0.4740435311612816, + "grad_norm": 0.48306596279144287, + "learning_rate": 1.7107997757313244e-05, + "loss": 0.13402557373046875, + "step": 7013 + }, + { + "epoch": 0.4741111261322158, + "grad_norm": 0.646181583404541, + "learning_rate": 1.710473092081634e-05, + "loss": 0.13153076171875, + "step": 7014 + }, + { + "epoch": 0.4741787211031499, + "grad_norm": 1.2029023170471191, + "learning_rate": 1.7101463982479456e-05, + "loss": 0.13481903076171875, + "step": 7015 + }, + { + "epoch": 0.4742463160740841, + "grad_norm": 0.5272010564804077, + "learning_rate": 1.7098196942460676e-05, + "loss": 0.08549690246582031, + "step": 7016 + }, + { + "epoch": 0.47431391104501824, + "grad_norm": 0.671617865562439, + "learning_rate": 1.7094929800918082e-05, + "loss": 0.167572021484375, + "step": 7017 + }, + { + "epoch": 0.4743815060159524, + "grad_norm": 0.6527005434036255, + "learning_rate": 1.709166255800976e-05, + "loss": 0.10143280029296875, + "step": 7018 + }, + { + "epoch": 0.4744491009868866, + "grad_norm": 1.0886995792388916, + "learning_rate": 1.7088395213893787e-05, + "loss": 0.176849365234375, + "step": 7019 + }, + { + "epoch": 0.47451669595782076, + "grad_norm": 0.8291929364204407, + "learning_rate": 1.7085127768728272e-05, + "loss": 0.16021728515625, + "step": 7020 + }, + { + "epoch": 0.4745842909287549, + "grad_norm": 1.2250126600265503, + "learning_rate": 1.7081860222671305e-05, + "loss": 0.13581085205078125, + "step": 7021 + }, + { + "epoch": 0.47465188589968904, + "grad_norm": 0.4937329590320587, + "learning_rate": 1.7078592575880995e-05, + "loss": 0.112030029296875, + "step": 7022 + }, + { + "epoch": 0.4747194808706232, + "grad_norm": 1.5786608457565308, + "learning_rate": 1.707532482851545e-05, + "loss": 0.235931396484375, + "step": 7023 + }, + { + "epoch": 0.4747870758415574, + "grad_norm": 0.6114101409912109, + "learning_rate": 1.707205698073278e-05, + "loss": 0.1407623291015625, + "step": 7024 + }, + { + "epoch": 0.47485467081249155, + "grad_norm": 0.5068272352218628, + "learning_rate": 1.706878903269111e-05, + "loss": 0.1474151611328125, + "step": 7025 + }, + { + "epoch": 0.4749222657834257, + "grad_norm": 0.573698103427887, + "learning_rate": 1.7065520984548562e-05, + "loss": 0.10425758361816406, + "step": 7026 + }, + { + "epoch": 0.4749898607543599, + "grad_norm": 0.3396477699279785, + "learning_rate": 1.7062252836463263e-05, + "loss": 0.067962646484375, + "step": 7027 + }, + { + "epoch": 0.47505745572529406, + "grad_norm": 1.1217862367630005, + "learning_rate": 1.705898458859335e-05, + "loss": 0.19427490234375, + "step": 7028 + }, + { + "epoch": 0.4751250506962282, + "grad_norm": 0.32858970761299133, + "learning_rate": 1.705571624109695e-05, + "loss": 0.065765380859375, + "step": 7029 + }, + { + "epoch": 0.47519264566716235, + "grad_norm": 0.21447259187698364, + "learning_rate": 1.705244779413222e-05, + "loss": 0.03218841552734375, + "step": 7030 + }, + { + "epoch": 0.4752602406380965, + "grad_norm": 0.6665438413619995, + "learning_rate": 1.7049179247857302e-05, + "loss": 0.1679534912109375, + "step": 7031 + }, + { + "epoch": 0.4753278356090307, + "grad_norm": 1.1842485666275024, + "learning_rate": 1.704591060243034e-05, + "loss": 0.12837982177734375, + "step": 7032 + }, + { + "epoch": 0.47539543057996486, + "grad_norm": 0.26598286628723145, + "learning_rate": 1.704264185800951e-05, + "loss": 0.06514739990234375, + "step": 7033 + }, + { + "epoch": 0.47546302555089903, + "grad_norm": 0.4844872057437897, + "learning_rate": 1.7039373014752957e-05, + "loss": 0.0944671630859375, + "step": 7034 + }, + { + "epoch": 0.4755306205218332, + "grad_norm": 0.3249261975288391, + "learning_rate": 1.703610407281886e-05, + "loss": 0.069244384765625, + "step": 7035 + }, + { + "epoch": 0.4755982154927673, + "grad_norm": 0.8127715587615967, + "learning_rate": 1.7032835032365383e-05, + "loss": 0.14687347412109375, + "step": 7036 + }, + { + "epoch": 0.4756658104637015, + "grad_norm": 0.2685084640979767, + "learning_rate": 1.7029565893550704e-05, + "loss": 0.057933807373046875, + "step": 7037 + }, + { + "epoch": 0.47573340543463566, + "grad_norm": 0.8088045716285706, + "learning_rate": 1.7026296656533007e-05, + "loss": 0.1669921875, + "step": 7038 + }, + { + "epoch": 0.47580100040556983, + "grad_norm": 0.3411334455013275, + "learning_rate": 1.7023027321470474e-05, + "loss": 0.04508399963378906, + "step": 7039 + }, + { + "epoch": 0.475868595376504, + "grad_norm": 0.3601999282836914, + "learning_rate": 1.7019757888521304e-05, + "loss": 0.0743255615234375, + "step": 7040 + }, + { + "epoch": 0.47593619034743817, + "grad_norm": 0.19870945811271667, + "learning_rate": 1.701648835784368e-05, + "loss": 0.032825469970703125, + "step": 7041 + }, + { + "epoch": 0.4760037853183723, + "grad_norm": 0.2219407856464386, + "learning_rate": 1.701321872959581e-05, + "loss": 0.04361724853515625, + "step": 7042 + }, + { + "epoch": 0.47607138028930646, + "grad_norm": 0.2567305266857147, + "learning_rate": 1.70099490039359e-05, + "loss": 0.0459136962890625, + "step": 7043 + }, + { + "epoch": 0.4761389752602406, + "grad_norm": 1.1821696758270264, + "learning_rate": 1.700667918102216e-05, + "loss": 0.215972900390625, + "step": 7044 + }, + { + "epoch": 0.4762065702311748, + "grad_norm": 0.41355767846107483, + "learning_rate": 1.700340926101279e-05, + "loss": 0.0866241455078125, + "step": 7045 + }, + { + "epoch": 0.47627416520210897, + "grad_norm": 0.36861059069633484, + "learning_rate": 1.7000139244066035e-05, + "loss": 0.04443168640136719, + "step": 7046 + }, + { + "epoch": 0.47634176017304314, + "grad_norm": 0.28299105167388916, + "learning_rate": 1.6996869130340097e-05, + "loss": 0.05866241455078125, + "step": 7047 + }, + { + "epoch": 0.4764093551439773, + "grad_norm": 0.21426410973072052, + "learning_rate": 1.6993598919993215e-05, + "loss": 0.03451347351074219, + "step": 7048 + }, + { + "epoch": 0.4764769501149114, + "grad_norm": 0.8699514269828796, + "learning_rate": 1.699032861318362e-05, + "loss": 0.166015625, + "step": 7049 + }, + { + "epoch": 0.4765445450858456, + "grad_norm": 0.23911616206169128, + "learning_rate": 1.698705821006954e-05, + "loss": 0.045124053955078125, + "step": 7050 + }, + { + "epoch": 0.47661214005677977, + "grad_norm": 0.4622245132923126, + "learning_rate": 1.698378771080924e-05, + "loss": 0.08790969848632812, + "step": 7051 + }, + { + "epoch": 0.47667973502771394, + "grad_norm": 0.8355687260627747, + "learning_rate": 1.6980517115560943e-05, + "loss": 0.14111709594726562, + "step": 7052 + }, + { + "epoch": 0.4767473299986481, + "grad_norm": 0.26722651720046997, + "learning_rate": 1.6977246424482918e-05, + "loss": 0.047321319580078125, + "step": 7053 + }, + { + "epoch": 0.4768149249695823, + "grad_norm": 0.5751071572303772, + "learning_rate": 1.6973975637733412e-05, + "loss": 0.10820770263671875, + "step": 7054 + }, + { + "epoch": 0.47688251994051645, + "grad_norm": 0.3517884612083435, + "learning_rate": 1.697070475547069e-05, + "loss": 0.044345855712890625, + "step": 7055 + }, + { + "epoch": 0.47695011491145056, + "grad_norm": 1.1143176555633545, + "learning_rate": 1.6967433777853015e-05, + "loss": 0.1380767822265625, + "step": 7056 + }, + { + "epoch": 0.47701770988238473, + "grad_norm": 1.0626699924468994, + "learning_rate": 1.696416270503866e-05, + "loss": 0.146728515625, + "step": 7057 + }, + { + "epoch": 0.4770853048533189, + "grad_norm": 0.32766199111938477, + "learning_rate": 1.6960891537185894e-05, + "loss": 0.060916900634765625, + "step": 7058 + }, + { + "epoch": 0.4771528998242531, + "grad_norm": 0.9922749996185303, + "learning_rate": 1.6957620274453003e-05, + "loss": 0.102783203125, + "step": 7059 + }, + { + "epoch": 0.47722049479518724, + "grad_norm": 0.4467424154281616, + "learning_rate": 1.6954348916998268e-05, + "loss": 0.0878753662109375, + "step": 7060 + }, + { + "epoch": 0.4772880897661214, + "grad_norm": 0.3481031060218811, + "learning_rate": 1.695107746497998e-05, + "loss": 0.0719146728515625, + "step": 7061 + }, + { + "epoch": 0.4773556847370556, + "grad_norm": 0.5250099897384644, + "learning_rate": 1.694780591855643e-05, + "loss": 0.1175079345703125, + "step": 7062 + }, + { + "epoch": 0.4774232797079897, + "grad_norm": 0.4588457942008972, + "learning_rate": 1.6944534277885916e-05, + "loss": 0.05731964111328125, + "step": 7063 + }, + { + "epoch": 0.47749087467892387, + "grad_norm": 0.754830002784729, + "learning_rate": 1.6941262543126744e-05, + "loss": 0.137908935546875, + "step": 7064 + }, + { + "epoch": 0.47755846964985804, + "grad_norm": 0.5636481642723083, + "learning_rate": 1.6937990714437216e-05, + "loss": 0.1032562255859375, + "step": 7065 + }, + { + "epoch": 0.4776260646207922, + "grad_norm": 0.5983796119689941, + "learning_rate": 1.693471879197564e-05, + "loss": 0.088134765625, + "step": 7066 + }, + { + "epoch": 0.4776936595917264, + "grad_norm": 0.9980268478393555, + "learning_rate": 1.6931446775900345e-05, + "loss": 0.140899658203125, + "step": 7067 + }, + { + "epoch": 0.47776125456266055, + "grad_norm": 1.8495755195617676, + "learning_rate": 1.692817466636964e-05, + "loss": 0.249603271484375, + "step": 7068 + }, + { + "epoch": 0.4778288495335947, + "grad_norm": 0.509811282157898, + "learning_rate": 1.692490246354185e-05, + "loss": 0.07010650634765625, + "step": 7069 + }, + { + "epoch": 0.47789644450452884, + "grad_norm": 0.49125421047210693, + "learning_rate": 1.692163016757531e-05, + "loss": 0.0643463134765625, + "step": 7070 + }, + { + "epoch": 0.477964039475463, + "grad_norm": 1.2652943134307861, + "learning_rate": 1.691835777862835e-05, + "loss": 0.1751556396484375, + "step": 7071 + }, + { + "epoch": 0.4780316344463972, + "grad_norm": 0.956493079662323, + "learning_rate": 1.6915085296859314e-05, + "loss": 0.16094970703125, + "step": 7072 + }, + { + "epoch": 0.47809922941733135, + "grad_norm": 0.4716120660305023, + "learning_rate": 1.6911812722426543e-05, + "loss": 0.08824539184570312, + "step": 7073 + }, + { + "epoch": 0.4781668243882655, + "grad_norm": 0.7171378135681152, + "learning_rate": 1.690854005548838e-05, + "loss": 0.164886474609375, + "step": 7074 + }, + { + "epoch": 0.4782344193591997, + "grad_norm": 1.1438313722610474, + "learning_rate": 1.6905267296203182e-05, + "loss": 0.20819091796875, + "step": 7075 + }, + { + "epoch": 0.47830201433013386, + "grad_norm": 0.22265176475048065, + "learning_rate": 1.6901994444729305e-05, + "loss": 0.043476104736328125, + "step": 7076 + }, + { + "epoch": 0.478369609301068, + "grad_norm": 0.2416522204875946, + "learning_rate": 1.6898721501225105e-05, + "loss": 0.045978546142578125, + "step": 7077 + }, + { + "epoch": 0.47843720427200215, + "grad_norm": 0.643521249294281, + "learning_rate": 1.689544846584895e-05, + "loss": 0.08318328857421875, + "step": 7078 + }, + { + "epoch": 0.4785047992429363, + "grad_norm": 1.1976988315582275, + "learning_rate": 1.689217533875921e-05, + "loss": 0.1641387939453125, + "step": 7079 + }, + { + "epoch": 0.4785723942138705, + "grad_norm": 0.9414463043212891, + "learning_rate": 1.6888902120114265e-05, + "loss": 0.14548492431640625, + "step": 7080 + }, + { + "epoch": 0.47863998918480466, + "grad_norm": 0.2749112546443939, + "learning_rate": 1.6885628810072485e-05, + "loss": 0.0484619140625, + "step": 7081 + }, + { + "epoch": 0.47870758415573883, + "grad_norm": 0.8649529218673706, + "learning_rate": 1.6882355408792256e-05, + "loss": 0.1451873779296875, + "step": 7082 + }, + { + "epoch": 0.478775179126673, + "grad_norm": 0.6048685908317566, + "learning_rate": 1.6879081916431963e-05, + "loss": 0.1197967529296875, + "step": 7083 + }, + { + "epoch": 0.4788427740976071, + "grad_norm": 0.32222163677215576, + "learning_rate": 1.687580833315001e-05, + "loss": 0.048069000244140625, + "step": 7084 + }, + { + "epoch": 0.4789103690685413, + "grad_norm": 0.7840332984924316, + "learning_rate": 1.6872534659104777e-05, + "loss": 0.171875, + "step": 7085 + }, + { + "epoch": 0.47897796403947546, + "grad_norm": 0.7708238959312439, + "learning_rate": 1.6869260894454678e-05, + "loss": 0.15142822265625, + "step": 7086 + }, + { + "epoch": 0.4790455590104096, + "grad_norm": 0.727470874786377, + "learning_rate": 1.6865987039358106e-05, + "loss": 0.1348876953125, + "step": 7087 + }, + { + "epoch": 0.4791131539813438, + "grad_norm": 1.2207551002502441, + "learning_rate": 1.6862713093973483e-05, + "loss": 0.18438720703125, + "step": 7088 + }, + { + "epoch": 0.47918074895227797, + "grad_norm": 0.23920004069805145, + "learning_rate": 1.6859439058459212e-05, + "loss": 0.031490325927734375, + "step": 7089 + }, + { + "epoch": 0.47924834392321214, + "grad_norm": 0.8407115936279297, + "learning_rate": 1.6856164932973714e-05, + "loss": 0.1240386962890625, + "step": 7090 + }, + { + "epoch": 0.47931593889414625, + "grad_norm": 0.5891973376274109, + "learning_rate": 1.6852890717675412e-05, + "loss": 0.1396331787109375, + "step": 7091 + }, + { + "epoch": 0.4793835338650804, + "grad_norm": 0.8456743955612183, + "learning_rate": 1.6849616412722742e-05, + "loss": 0.156158447265625, + "step": 7092 + }, + { + "epoch": 0.4794511288360146, + "grad_norm": 0.2993752658367157, + "learning_rate": 1.6846342018274122e-05, + "loss": 0.0372314453125, + "step": 7093 + }, + { + "epoch": 0.47951872380694877, + "grad_norm": 0.9798411726951599, + "learning_rate": 1.6843067534487993e-05, + "loss": 0.1454925537109375, + "step": 7094 + }, + { + "epoch": 0.47958631877788294, + "grad_norm": 1.0061579942703247, + "learning_rate": 1.6839792961522795e-05, + "loss": 0.1445770263671875, + "step": 7095 + }, + { + "epoch": 0.4796539137488171, + "grad_norm": 1.3627829551696777, + "learning_rate": 1.683651829953697e-05, + "loss": 0.12175750732421875, + "step": 7096 + }, + { + "epoch": 0.4797215087197513, + "grad_norm": 0.5963348746299744, + "learning_rate": 1.683324354868897e-05, + "loss": 0.10266876220703125, + "step": 7097 + }, + { + "epoch": 0.4797891036906854, + "grad_norm": 1.0910136699676514, + "learning_rate": 1.682996870913725e-05, + "loss": 0.119354248046875, + "step": 7098 + }, + { + "epoch": 0.47985669866161956, + "grad_norm": 0.5081384181976318, + "learning_rate": 1.682669378104026e-05, + "loss": 0.0761260986328125, + "step": 7099 + }, + { + "epoch": 0.47992429363255373, + "grad_norm": 0.8067759275436401, + "learning_rate": 1.6823418764556466e-05, + "loss": 0.10396099090576172, + "step": 7100 + }, + { + "epoch": 0.4799918886034879, + "grad_norm": 0.5113754868507385, + "learning_rate": 1.6820143659844334e-05, + "loss": 0.1092987060546875, + "step": 7101 + }, + { + "epoch": 0.4800594835744221, + "grad_norm": 0.8351048827171326, + "learning_rate": 1.681686846706233e-05, + "loss": 0.2117156982421875, + "step": 7102 + }, + { + "epoch": 0.48012707854535625, + "grad_norm": 0.9327250719070435, + "learning_rate": 1.6813593186368928e-05, + "loss": 0.154998779296875, + "step": 7103 + }, + { + "epoch": 0.4801946735162904, + "grad_norm": 0.352709025144577, + "learning_rate": 1.681031781792261e-05, + "loss": 0.05326080322265625, + "step": 7104 + }, + { + "epoch": 0.48026226848722453, + "grad_norm": 1.3057098388671875, + "learning_rate": 1.680704236188186e-05, + "loss": 0.2409820556640625, + "step": 7105 + }, + { + "epoch": 0.4803298634581587, + "grad_norm": 1.1411441564559937, + "learning_rate": 1.6803766818405162e-05, + "loss": 0.16278076171875, + "step": 7106 + }, + { + "epoch": 0.48039745842909287, + "grad_norm": 0.2973930239677429, + "learning_rate": 1.6800491187651006e-05, + "loss": 0.06982421875, + "step": 7107 + }, + { + "epoch": 0.48046505340002704, + "grad_norm": 0.6991247534751892, + "learning_rate": 1.6797215469777893e-05, + "loss": 0.09978485107421875, + "step": 7108 + }, + { + "epoch": 0.4805326483709612, + "grad_norm": 1.1823426485061646, + "learning_rate": 1.6793939664944318e-05, + "loss": 0.15502166748046875, + "step": 7109 + }, + { + "epoch": 0.4806002433418954, + "grad_norm": 0.6142722964286804, + "learning_rate": 1.6790663773308786e-05, + "loss": 0.11374664306640625, + "step": 7110 + }, + { + "epoch": 0.4806678383128295, + "grad_norm": 0.817129909992218, + "learning_rate": 1.6787387795029806e-05, + "loss": 0.11469650268554688, + "step": 7111 + }, + { + "epoch": 0.48073543328376367, + "grad_norm": 1.0202692747116089, + "learning_rate": 1.6784111730265882e-05, + "loss": 0.15950775146484375, + "step": 7112 + }, + { + "epoch": 0.48080302825469784, + "grad_norm": 2.036755084991455, + "learning_rate": 1.678083557917554e-05, + "loss": 0.1847991943359375, + "step": 7113 + }, + { + "epoch": 0.480870623225632, + "grad_norm": 0.2548389434814453, + "learning_rate": 1.6777559341917302e-05, + "loss": 0.04843902587890625, + "step": 7114 + }, + { + "epoch": 0.4809382181965662, + "grad_norm": 0.44516521692276, + "learning_rate": 1.6774283018649683e-05, + "loss": 0.1196746826171875, + "step": 7115 + }, + { + "epoch": 0.48100581316750035, + "grad_norm": 0.1347535103559494, + "learning_rate": 1.677100660953122e-05, + "loss": 0.025501251220703125, + "step": 7116 + }, + { + "epoch": 0.4810734081384345, + "grad_norm": 0.6415365934371948, + "learning_rate": 1.6767730114720446e-05, + "loss": 0.13616180419921875, + "step": 7117 + }, + { + "epoch": 0.48114100310936864, + "grad_norm": 0.2484583705663681, + "learning_rate": 1.676445353437589e-05, + "loss": 0.050991058349609375, + "step": 7118 + }, + { + "epoch": 0.4812085980803028, + "grad_norm": 0.9110817909240723, + "learning_rate": 1.6761176868656105e-05, + "loss": 0.21068954467773438, + "step": 7119 + }, + { + "epoch": 0.481276193051237, + "grad_norm": 0.9608064293861389, + "learning_rate": 1.6757900117719627e-05, + "loss": 0.177978515625, + "step": 7120 + }, + { + "epoch": 0.48134378802217115, + "grad_norm": 1.2428216934204102, + "learning_rate": 1.6754623281725005e-05, + "loss": 0.234832763671875, + "step": 7121 + }, + { + "epoch": 0.4814113829931053, + "grad_norm": 0.7517881393432617, + "learning_rate": 1.6751346360830803e-05, + "loss": 0.14874649047851562, + "step": 7122 + }, + { + "epoch": 0.4814789779640395, + "grad_norm": 0.8199494481086731, + "learning_rate": 1.6748069355195574e-05, + "loss": 0.16008758544921875, + "step": 7123 + }, + { + "epoch": 0.48154657293497366, + "grad_norm": 0.35545849800109863, + "learning_rate": 1.6744792264977873e-05, + "loss": 0.0902862548828125, + "step": 7124 + }, + { + "epoch": 0.4816141679059078, + "grad_norm": 0.3554932475090027, + "learning_rate": 1.6741515090336274e-05, + "loss": 0.09160614013671875, + "step": 7125 + }, + { + "epoch": 0.48168176287684195, + "grad_norm": 0.7817666530609131, + "learning_rate": 1.6738237831429346e-05, + "loss": 0.1587982177734375, + "step": 7126 + }, + { + "epoch": 0.4817493578477761, + "grad_norm": 0.26407572627067566, + "learning_rate": 1.673496048841566e-05, + "loss": 0.06286811828613281, + "step": 7127 + }, + { + "epoch": 0.4818169528187103, + "grad_norm": 0.31737303733825684, + "learning_rate": 1.6731683061453797e-05, + "loss": 0.060302734375, + "step": 7128 + }, + { + "epoch": 0.48188454778964446, + "grad_norm": 0.29409605264663696, + "learning_rate": 1.6728405550702334e-05, + "loss": 0.06837081909179688, + "step": 7129 + }, + { + "epoch": 0.48195214276057863, + "grad_norm": 0.8039095401763916, + "learning_rate": 1.6725127956319863e-05, + "loss": 0.13730621337890625, + "step": 7130 + }, + { + "epoch": 0.4820197377315128, + "grad_norm": 1.1393814086914062, + "learning_rate": 1.6721850278464978e-05, + "loss": 0.1688995361328125, + "step": 7131 + }, + { + "epoch": 0.4820873327024469, + "grad_norm": 1.0191500186920166, + "learning_rate": 1.6718572517296265e-05, + "loss": 0.20599365234375, + "step": 7132 + }, + { + "epoch": 0.4821549276733811, + "grad_norm": 1.0158220529556274, + "learning_rate": 1.6715294672972324e-05, + "loss": 0.1371307373046875, + "step": 7133 + }, + { + "epoch": 0.48222252264431525, + "grad_norm": 1.263494849205017, + "learning_rate": 1.6712016745651763e-05, + "loss": 0.280792236328125, + "step": 7134 + }, + { + "epoch": 0.4822901176152494, + "grad_norm": 0.6689848303794861, + "learning_rate": 1.6708738735493188e-05, + "loss": 0.0923614501953125, + "step": 7135 + }, + { + "epoch": 0.4823577125861836, + "grad_norm": 0.516035795211792, + "learning_rate": 1.6705460642655202e-05, + "loss": 0.10387039184570312, + "step": 7136 + }, + { + "epoch": 0.48242530755711777, + "grad_norm": 0.2408766895532608, + "learning_rate": 1.6702182467296423e-05, + "loss": 0.04993438720703125, + "step": 7137 + }, + { + "epoch": 0.48249290252805194, + "grad_norm": 0.6272457838058472, + "learning_rate": 1.669890420957547e-05, + "loss": 0.116912841796875, + "step": 7138 + }, + { + "epoch": 0.48256049749898605, + "grad_norm": 0.8243281841278076, + "learning_rate": 1.6695625869650965e-05, + "loss": 0.1432037353515625, + "step": 7139 + }, + { + "epoch": 0.4826280924699202, + "grad_norm": 0.9051365256309509, + "learning_rate": 1.669234744768154e-05, + "loss": 0.15740203857421875, + "step": 7140 + }, + { + "epoch": 0.4826956874408544, + "grad_norm": 1.3829874992370605, + "learning_rate": 1.668906894382582e-05, + "loss": 0.193572998046875, + "step": 7141 + }, + { + "epoch": 0.48276328241178856, + "grad_norm": 0.6734040379524231, + "learning_rate": 1.668579035824244e-05, + "loss": 0.1232757568359375, + "step": 7142 + }, + { + "epoch": 0.48283087738272273, + "grad_norm": 0.5219576358795166, + "learning_rate": 1.6682511691090033e-05, + "loss": 0.0871429443359375, + "step": 7143 + }, + { + "epoch": 0.4828984723536569, + "grad_norm": 0.9471499919891357, + "learning_rate": 1.6679232942527254e-05, + "loss": 0.17205810546875, + "step": 7144 + }, + { + "epoch": 0.4829660673245911, + "grad_norm": 0.7706981897354126, + "learning_rate": 1.6675954112712738e-05, + "loss": 0.2177581787109375, + "step": 7145 + }, + { + "epoch": 0.4830336622955252, + "grad_norm": 0.4583047032356262, + "learning_rate": 1.6672675201805144e-05, + "loss": 0.111297607421875, + "step": 7146 + }, + { + "epoch": 0.48310125726645936, + "grad_norm": 0.19451895356178284, + "learning_rate": 1.666939620996312e-05, + "loss": 0.025360107421875, + "step": 7147 + }, + { + "epoch": 0.48316885223739353, + "grad_norm": 0.7076078057289124, + "learning_rate": 1.6666117137345325e-05, + "loss": 0.13409423828125, + "step": 7148 + }, + { + "epoch": 0.4832364472083277, + "grad_norm": 0.9335691928863525, + "learning_rate": 1.666283798411042e-05, + "loss": 0.15057373046875, + "step": 7149 + }, + { + "epoch": 0.4833040421792619, + "grad_norm": 0.9537009000778198, + "learning_rate": 1.6659558750417073e-05, + "loss": 0.184417724609375, + "step": 7150 + }, + { + "epoch": 0.48337163715019604, + "grad_norm": 0.539905309677124, + "learning_rate": 1.6656279436423952e-05, + "loss": 0.07758712768554688, + "step": 7151 + }, + { + "epoch": 0.4834392321211302, + "grad_norm": 0.47651639580726624, + "learning_rate": 1.6653000042289734e-05, + "loss": 0.06519317626953125, + "step": 7152 + }, + { + "epoch": 0.48350682709206433, + "grad_norm": 0.45091724395751953, + "learning_rate": 1.664972056817309e-05, + "loss": 0.118804931640625, + "step": 7153 + }, + { + "epoch": 0.4835744220629985, + "grad_norm": 1.0914713144302368, + "learning_rate": 1.6646441014232704e-05, + "loss": 0.1764678955078125, + "step": 7154 + }, + { + "epoch": 0.48364201703393267, + "grad_norm": 0.1902484893798828, + "learning_rate": 1.6643161380627272e-05, + "loss": 0.028036117553710938, + "step": 7155 + }, + { + "epoch": 0.48370961200486684, + "grad_norm": 0.7397187948226929, + "learning_rate": 1.6639881667515466e-05, + "loss": 0.108642578125, + "step": 7156 + }, + { + "epoch": 0.483777206975801, + "grad_norm": 0.6905274987220764, + "learning_rate": 1.663660187505599e-05, + "loss": 0.1285858154296875, + "step": 7157 + }, + { + "epoch": 0.4838448019467352, + "grad_norm": 1.0036289691925049, + "learning_rate": 1.6633322003407535e-05, + "loss": 0.15069580078125, + "step": 7158 + }, + { + "epoch": 0.48391239691766935, + "grad_norm": 1.0038787126541138, + "learning_rate": 1.6630042052728807e-05, + "loss": 0.172943115234375, + "step": 7159 + }, + { + "epoch": 0.48397999188860347, + "grad_norm": 0.5638059377670288, + "learning_rate": 1.6626762023178505e-05, + "loss": 0.1270599365234375, + "step": 7160 + }, + { + "epoch": 0.48404758685953764, + "grad_norm": 0.6767899394035339, + "learning_rate": 1.6623481914915342e-05, + "loss": 0.09767913818359375, + "step": 7161 + }, + { + "epoch": 0.4841151818304718, + "grad_norm": 0.6109246611595154, + "learning_rate": 1.6620201728098025e-05, + "loss": 0.10919189453125, + "step": 7162 + }, + { + "epoch": 0.484182776801406, + "grad_norm": 0.30942755937576294, + "learning_rate": 1.661692146288527e-05, + "loss": 0.044376373291015625, + "step": 7163 + }, + { + "epoch": 0.48425037177234015, + "grad_norm": 0.8928020596504211, + "learning_rate": 1.6613641119435805e-05, + "loss": 0.14080429077148438, + "step": 7164 + }, + { + "epoch": 0.4843179667432743, + "grad_norm": 1.1126946210861206, + "learning_rate": 1.6610360697908345e-05, + "loss": 0.209716796875, + "step": 7165 + }, + { + "epoch": 0.4843855617142085, + "grad_norm": 1.1349366903305054, + "learning_rate": 1.6607080198461623e-05, + "loss": 0.166290283203125, + "step": 7166 + }, + { + "epoch": 0.4844531566851426, + "grad_norm": 0.9455057978630066, + "learning_rate": 1.6603799621254356e-05, + "loss": 0.218994140625, + "step": 7167 + }, + { + "epoch": 0.4845207516560768, + "grad_norm": 1.0490330457687378, + "learning_rate": 1.6600518966445298e-05, + "loss": 0.13668060302734375, + "step": 7168 + }, + { + "epoch": 0.48458834662701095, + "grad_norm": 0.5695652961730957, + "learning_rate": 1.6597238234193183e-05, + "loss": 0.0833587646484375, + "step": 7169 + }, + { + "epoch": 0.4846559415979451, + "grad_norm": 0.25768280029296875, + "learning_rate": 1.659395742465674e-05, + "loss": 0.0313720703125, + "step": 7170 + }, + { + "epoch": 0.4847235365688793, + "grad_norm": 0.2703379690647125, + "learning_rate": 1.659067653799473e-05, + "loss": 0.03542327880859375, + "step": 7171 + }, + { + "epoch": 0.48479113153981346, + "grad_norm": 0.41311633586883545, + "learning_rate": 1.6587395574365892e-05, + "loss": 0.097625732421875, + "step": 7172 + }, + { + "epoch": 0.4848587265107476, + "grad_norm": 0.21898558735847473, + "learning_rate": 1.658411453392899e-05, + "loss": 0.03130054473876953, + "step": 7173 + }, + { + "epoch": 0.48492632148168174, + "grad_norm": 1.5366077423095703, + "learning_rate": 1.658083341684277e-05, + "loss": 0.17512893676757812, + "step": 7174 + }, + { + "epoch": 0.4849939164526159, + "grad_norm": 0.6569116711616516, + "learning_rate": 1.6577552223265997e-05, + "loss": 0.10540008544921875, + "step": 7175 + }, + { + "epoch": 0.4850615114235501, + "grad_norm": 0.29875993728637695, + "learning_rate": 1.6574270953357442e-05, + "loss": 0.05694580078125, + "step": 7176 + }, + { + "epoch": 0.48512910639448426, + "grad_norm": 0.7620000243186951, + "learning_rate": 1.6570989607275865e-05, + "loss": 0.172821044921875, + "step": 7177 + }, + { + "epoch": 0.4851967013654184, + "grad_norm": 0.3425627052783966, + "learning_rate": 1.6567708185180046e-05, + "loss": 0.073486328125, + "step": 7178 + }, + { + "epoch": 0.4852642963363526, + "grad_norm": 0.44281134009361267, + "learning_rate": 1.6564426687228748e-05, + "loss": 0.05402374267578125, + "step": 7179 + }, + { + "epoch": 0.4853318913072867, + "grad_norm": 0.3789082467556, + "learning_rate": 1.656114511358076e-05, + "loss": 0.05532073974609375, + "step": 7180 + }, + { + "epoch": 0.4853994862782209, + "grad_norm": 0.2625318467617035, + "learning_rate": 1.655786346439487e-05, + "loss": 0.06776046752929688, + "step": 7181 + }, + { + "epoch": 0.48546708124915505, + "grad_norm": 0.974777102470398, + "learning_rate": 1.655458173982985e-05, + "loss": 0.1594696044921875, + "step": 7182 + }, + { + "epoch": 0.4855346762200892, + "grad_norm": 0.2983405292034149, + "learning_rate": 1.6551299940044496e-05, + "loss": 0.036396026611328125, + "step": 7183 + }, + { + "epoch": 0.4856022711910234, + "grad_norm": 0.3490510880947113, + "learning_rate": 1.6548018065197607e-05, + "loss": 0.073822021484375, + "step": 7184 + }, + { + "epoch": 0.48566986616195756, + "grad_norm": 0.4313200116157532, + "learning_rate": 1.6544736115447978e-05, + "loss": 0.0783233642578125, + "step": 7185 + }, + { + "epoch": 0.48573746113289173, + "grad_norm": 0.7514928579330444, + "learning_rate": 1.654145409095441e-05, + "loss": 0.1428070068359375, + "step": 7186 + }, + { + "epoch": 0.48580505610382585, + "grad_norm": 1.0287673473358154, + "learning_rate": 1.65381719918757e-05, + "loss": 0.25665283203125, + "step": 7187 + }, + { + "epoch": 0.48587265107476, + "grad_norm": 0.9263238906860352, + "learning_rate": 1.6534889818370667e-05, + "loss": 0.1444854736328125, + "step": 7188 + }, + { + "epoch": 0.4859402460456942, + "grad_norm": 0.3219325542449951, + "learning_rate": 1.653160757059812e-05, + "loss": 0.07828521728515625, + "step": 7189 + }, + { + "epoch": 0.48600784101662836, + "grad_norm": 0.39869630336761475, + "learning_rate": 1.6528325248716874e-05, + "loss": 0.0833740234375, + "step": 7190 + }, + { + "epoch": 0.48607543598756253, + "grad_norm": 0.8324849009513855, + "learning_rate": 1.652504285288575e-05, + "loss": 0.14007568359375, + "step": 7191 + }, + { + "epoch": 0.4861430309584967, + "grad_norm": 0.5262117981910706, + "learning_rate": 1.6521760383263563e-05, + "loss": 0.0985107421875, + "step": 7192 + }, + { + "epoch": 0.4862106259294309, + "grad_norm": 0.33773770928382874, + "learning_rate": 1.6518477840009152e-05, + "loss": 0.0764312744140625, + "step": 7193 + }, + { + "epoch": 0.486278220900365, + "grad_norm": 0.6948614716529846, + "learning_rate": 1.6515195223281334e-05, + "loss": 0.1212310791015625, + "step": 7194 + }, + { + "epoch": 0.48634581587129916, + "grad_norm": 1.5476471185684204, + "learning_rate": 1.6511912533238953e-05, + "loss": 0.247406005859375, + "step": 7195 + }, + { + "epoch": 0.48641341084223333, + "grad_norm": 0.15770019590854645, + "learning_rate": 1.6508629770040833e-05, + "loss": 0.031337738037109375, + "step": 7196 + }, + { + "epoch": 0.4864810058131675, + "grad_norm": 0.6522020697593689, + "learning_rate": 1.650534693384583e-05, + "loss": 0.13739013671875, + "step": 7197 + }, + { + "epoch": 0.48654860078410167, + "grad_norm": 0.1963447779417038, + "learning_rate": 1.650206402481278e-05, + "loss": 0.038707733154296875, + "step": 7198 + }, + { + "epoch": 0.48661619575503584, + "grad_norm": 0.3475341498851776, + "learning_rate": 1.649878104310053e-05, + "loss": 0.08103179931640625, + "step": 7199 + }, + { + "epoch": 0.48668379072597, + "grad_norm": 0.8084296584129333, + "learning_rate": 1.6495497988867926e-05, + "loss": 0.10286712646484375, + "step": 7200 + }, + { + "epoch": 0.4867513856969041, + "grad_norm": 0.28268176317214966, + "learning_rate": 1.6492214862273837e-05, + "loss": 0.069671630859375, + "step": 7201 + }, + { + "epoch": 0.4868189806678383, + "grad_norm": 0.4078062176704407, + "learning_rate": 1.648893166347711e-05, + "loss": 0.06140899658203125, + "step": 7202 + }, + { + "epoch": 0.48688657563877247, + "grad_norm": 0.4113885462284088, + "learning_rate": 1.6485648392636615e-05, + "loss": 0.11175346374511719, + "step": 7203 + }, + { + "epoch": 0.48695417060970664, + "grad_norm": 0.7102387547492981, + "learning_rate": 1.6482365049911207e-05, + "loss": 0.16259765625, + "step": 7204 + }, + { + "epoch": 0.4870217655806408, + "grad_norm": 1.1026108264923096, + "learning_rate": 1.647908163545976e-05, + "loss": 0.196807861328125, + "step": 7205 + }, + { + "epoch": 0.487089360551575, + "grad_norm": 0.8854654431343079, + "learning_rate": 1.6475798149441142e-05, + "loss": 0.16121673583984375, + "step": 7206 + }, + { + "epoch": 0.48715695552250915, + "grad_norm": 0.5953736901283264, + "learning_rate": 1.6472514592014238e-05, + "loss": 0.1180572509765625, + "step": 7207 + }, + { + "epoch": 0.48722455049344326, + "grad_norm": 1.3305463790893555, + "learning_rate": 1.6469230963337913e-05, + "loss": 0.20538330078125, + "step": 7208 + }, + { + "epoch": 0.48729214546437744, + "grad_norm": 1.1496319770812988, + "learning_rate": 1.6465947263571062e-05, + "loss": 0.2110595703125, + "step": 7209 + }, + { + "epoch": 0.4873597404353116, + "grad_norm": 0.811286449432373, + "learning_rate": 1.6462663492872568e-05, + "loss": 0.193572998046875, + "step": 7210 + }, + { + "epoch": 0.4874273354062458, + "grad_norm": 0.8263372182846069, + "learning_rate": 1.6459379651401313e-05, + "loss": 0.1510162353515625, + "step": 7211 + }, + { + "epoch": 0.48749493037717995, + "grad_norm": 0.7184428572654724, + "learning_rate": 1.6456095739316194e-05, + "loss": 0.1440887451171875, + "step": 7212 + }, + { + "epoch": 0.4875625253481141, + "grad_norm": 1.1097813844680786, + "learning_rate": 1.6452811756776108e-05, + "loss": 0.199005126953125, + "step": 7213 + }, + { + "epoch": 0.4876301203190483, + "grad_norm": 0.5761427879333496, + "learning_rate": 1.6449527703939958e-05, + "loss": 0.1094970703125, + "step": 7214 + }, + { + "epoch": 0.4876977152899824, + "grad_norm": 0.7743194699287415, + "learning_rate": 1.644624358096664e-05, + "loss": 0.1298675537109375, + "step": 7215 + }, + { + "epoch": 0.4877653102609166, + "grad_norm": 0.4960884153842926, + "learning_rate": 1.6442959388015066e-05, + "loss": 0.11346435546875, + "step": 7216 + }, + { + "epoch": 0.48783290523185074, + "grad_norm": 0.29451921582221985, + "learning_rate": 1.643967512524414e-05, + "loss": 0.05597686767578125, + "step": 7217 + }, + { + "epoch": 0.4879005002027849, + "grad_norm": 0.38172680139541626, + "learning_rate": 1.6436390792812783e-05, + "loss": 0.08048248291015625, + "step": 7218 + }, + { + "epoch": 0.4879680951737191, + "grad_norm": 0.6098724007606506, + "learning_rate": 1.6433106390879905e-05, + "loss": 0.1342926025390625, + "step": 7219 + }, + { + "epoch": 0.48803569014465326, + "grad_norm": 1.1590780019760132, + "learning_rate": 1.6429821919604427e-05, + "loss": 0.19403076171875, + "step": 7220 + }, + { + "epoch": 0.4881032851155874, + "grad_norm": 0.5412377119064331, + "learning_rate": 1.642653737914527e-05, + "loss": 0.08603668212890625, + "step": 7221 + }, + { + "epoch": 0.48817088008652154, + "grad_norm": 0.45182445645332336, + "learning_rate": 1.6423252769661364e-05, + "loss": 0.06935882568359375, + "step": 7222 + }, + { + "epoch": 0.4882384750574557, + "grad_norm": 0.9848971366882324, + "learning_rate": 1.641996809131164e-05, + "loss": 0.1290435791015625, + "step": 7223 + }, + { + "epoch": 0.4883060700283899, + "grad_norm": 0.8229327201843262, + "learning_rate": 1.6416683344255025e-05, + "loss": 0.1269378662109375, + "step": 7224 + }, + { + "epoch": 0.48837366499932405, + "grad_norm": 0.8189663887023926, + "learning_rate": 1.6413398528650463e-05, + "loss": 0.14122772216796875, + "step": 7225 + }, + { + "epoch": 0.4884412599702582, + "grad_norm": 0.5675697326660156, + "learning_rate": 1.641011364465689e-05, + "loss": 0.10994720458984375, + "step": 7226 + }, + { + "epoch": 0.4885088549411924, + "grad_norm": 0.8506916165351868, + "learning_rate": 1.6406828692433246e-05, + "loss": 0.153045654296875, + "step": 7227 + }, + { + "epoch": 0.48857644991212656, + "grad_norm": 0.9565702080726624, + "learning_rate": 1.640354367213849e-05, + "loss": 0.225494384765625, + "step": 7228 + }, + { + "epoch": 0.4886440448830607, + "grad_norm": 0.3490528464317322, + "learning_rate": 1.6400258583931552e-05, + "loss": 0.07244110107421875, + "step": 7229 + }, + { + "epoch": 0.48871163985399485, + "grad_norm": 0.8087717294692993, + "learning_rate": 1.63969734279714e-05, + "loss": 0.1121368408203125, + "step": 7230 + }, + { + "epoch": 0.488779234824929, + "grad_norm": 0.782379686832428, + "learning_rate": 1.6393688204416988e-05, + "loss": 0.17254638671875, + "step": 7231 + }, + { + "epoch": 0.4888468297958632, + "grad_norm": 0.5921162962913513, + "learning_rate": 1.6390402913427273e-05, + "loss": 0.10572052001953125, + "step": 7232 + }, + { + "epoch": 0.48891442476679736, + "grad_norm": 0.9519817233085632, + "learning_rate": 1.6387117555161217e-05, + "loss": 0.1568603515625, + "step": 7233 + }, + { + "epoch": 0.48898201973773153, + "grad_norm": 0.49015137553215027, + "learning_rate": 1.6383832129777785e-05, + "loss": 0.10066986083984375, + "step": 7234 + }, + { + "epoch": 0.48904961470866565, + "grad_norm": 1.4326790571212769, + "learning_rate": 1.638054663743595e-05, + "loss": 0.25274658203125, + "step": 7235 + }, + { + "epoch": 0.4891172096795998, + "grad_norm": 0.8692492246627808, + "learning_rate": 1.6377261078294684e-05, + "loss": 0.14395904541015625, + "step": 7236 + }, + { + "epoch": 0.489184804650534, + "grad_norm": 0.8956289887428284, + "learning_rate": 1.6373975452512964e-05, + "loss": 0.13272857666015625, + "step": 7237 + }, + { + "epoch": 0.48925239962146816, + "grad_norm": 0.7287648916244507, + "learning_rate": 1.637068976024976e-05, + "loss": 0.125885009765625, + "step": 7238 + }, + { + "epoch": 0.48931999459240233, + "grad_norm": 0.9688509106636047, + "learning_rate": 1.6367404001664067e-05, + "loss": 0.200408935546875, + "step": 7239 + }, + { + "epoch": 0.4893875895633365, + "grad_norm": 0.1787312626838684, + "learning_rate": 1.6364118176914866e-05, + "loss": 0.02997589111328125, + "step": 7240 + }, + { + "epoch": 0.48945518453427067, + "grad_norm": 0.695336639881134, + "learning_rate": 1.6360832286161145e-05, + "loss": 0.12398529052734375, + "step": 7241 + }, + { + "epoch": 0.4895227795052048, + "grad_norm": 0.5546087026596069, + "learning_rate": 1.6357546329561888e-05, + "loss": 0.0993194580078125, + "step": 7242 + }, + { + "epoch": 0.48959037447613896, + "grad_norm": 0.5505721569061279, + "learning_rate": 1.6354260307276103e-05, + "loss": 0.09737396240234375, + "step": 7243 + }, + { + "epoch": 0.4896579694470731, + "grad_norm": 0.4825986921787262, + "learning_rate": 1.635097421946278e-05, + "loss": 0.06189727783203125, + "step": 7244 + }, + { + "epoch": 0.4897255644180073, + "grad_norm": 0.5005894303321838, + "learning_rate": 1.6347688066280924e-05, + "loss": 0.094268798828125, + "step": 7245 + }, + { + "epoch": 0.48979315938894147, + "grad_norm": 0.8126192092895508, + "learning_rate": 1.6344401847889533e-05, + "loss": 0.148834228515625, + "step": 7246 + }, + { + "epoch": 0.48986075435987564, + "grad_norm": 0.43198803067207336, + "learning_rate": 1.6341115564447624e-05, + "loss": 0.122650146484375, + "step": 7247 + }, + { + "epoch": 0.4899283493308098, + "grad_norm": 1.4055665731430054, + "learning_rate": 1.6337829216114203e-05, + "loss": 0.1735382080078125, + "step": 7248 + }, + { + "epoch": 0.4899959443017439, + "grad_norm": 0.21446824073791504, + "learning_rate": 1.6334542803048287e-05, + "loss": 0.037357330322265625, + "step": 7249 + }, + { + "epoch": 0.4900635392726781, + "grad_norm": 0.20237687230110168, + "learning_rate": 1.633125632540889e-05, + "loss": 0.0465545654296875, + "step": 7250 + }, + { + "epoch": 0.49013113424361227, + "grad_norm": 0.3492870628833771, + "learning_rate": 1.6327969783355032e-05, + "loss": 0.07537841796875, + "step": 7251 + }, + { + "epoch": 0.49019872921454644, + "grad_norm": 0.42894288897514343, + "learning_rate": 1.6324683177045736e-05, + "loss": 0.06429290771484375, + "step": 7252 + }, + { + "epoch": 0.4902663241854806, + "grad_norm": 0.548743486404419, + "learning_rate": 1.6321396506640033e-05, + "loss": 0.12191009521484375, + "step": 7253 + }, + { + "epoch": 0.4903339191564148, + "grad_norm": 0.24221894145011902, + "learning_rate": 1.6318109772296943e-05, + "loss": 0.042144775390625, + "step": 7254 + }, + { + "epoch": 0.49040151412734895, + "grad_norm": 0.3369283080101013, + "learning_rate": 1.631482297417551e-05, + "loss": 0.06838226318359375, + "step": 7255 + }, + { + "epoch": 0.49046910909828306, + "grad_norm": 0.2832658290863037, + "learning_rate": 1.6311536112434764e-05, + "loss": 0.0697784423828125, + "step": 7256 + }, + { + "epoch": 0.49053670406921723, + "grad_norm": 0.22035983204841614, + "learning_rate": 1.6308249187233744e-05, + "loss": 0.02834320068359375, + "step": 7257 + }, + { + "epoch": 0.4906042990401514, + "grad_norm": 0.8138015270233154, + "learning_rate": 1.6304962198731493e-05, + "loss": 0.152923583984375, + "step": 7258 + }, + { + "epoch": 0.4906718940110856, + "grad_norm": 0.3645390272140503, + "learning_rate": 1.6301675147087053e-05, + "loss": 0.083648681640625, + "step": 7259 + }, + { + "epoch": 0.49073948898201974, + "grad_norm": 0.3598008453845978, + "learning_rate": 1.629838803245948e-05, + "loss": 0.0900421142578125, + "step": 7260 + }, + { + "epoch": 0.4908070839529539, + "grad_norm": 0.3568342328071594, + "learning_rate": 1.6295100855007816e-05, + "loss": 0.0666046142578125, + "step": 7261 + }, + { + "epoch": 0.4908746789238881, + "grad_norm": 0.7893030047416687, + "learning_rate": 1.6291813614891118e-05, + "loss": 0.18668746948242188, + "step": 7262 + }, + { + "epoch": 0.4909422738948222, + "grad_norm": 0.19900712370872498, + "learning_rate": 1.6288526312268446e-05, + "loss": 0.026081085205078125, + "step": 7263 + }, + { + "epoch": 0.49100986886575637, + "grad_norm": 0.3488468825817108, + "learning_rate": 1.6285238947298855e-05, + "loss": 0.05405426025390625, + "step": 7264 + }, + { + "epoch": 0.49107746383669054, + "grad_norm": 0.6479608416557312, + "learning_rate": 1.6281951520141412e-05, + "loss": 0.118743896484375, + "step": 7265 + }, + { + "epoch": 0.4911450588076247, + "grad_norm": 0.30771878361701965, + "learning_rate": 1.627866403095518e-05, + "loss": 0.0465850830078125, + "step": 7266 + }, + { + "epoch": 0.4912126537785589, + "grad_norm": 0.8059490323066711, + "learning_rate": 1.6275376479899233e-05, + "loss": 0.08259773254394531, + "step": 7267 + }, + { + "epoch": 0.49128024874949305, + "grad_norm": 0.32663974165916443, + "learning_rate": 1.627208886713264e-05, + "loss": 0.05184173583984375, + "step": 7268 + }, + { + "epoch": 0.4913478437204272, + "grad_norm": 0.2776538133621216, + "learning_rate": 1.6268801192814476e-05, + "loss": 0.05513763427734375, + "step": 7269 + }, + { + "epoch": 0.49141543869136134, + "grad_norm": 0.9182288646697998, + "learning_rate": 1.6265513457103818e-05, + "loss": 0.11956787109375, + "step": 7270 + }, + { + "epoch": 0.4914830336622955, + "grad_norm": 1.7467576265335083, + "learning_rate": 1.626222566015975e-05, + "loss": 0.22808837890625, + "step": 7271 + }, + { + "epoch": 0.4915506286332297, + "grad_norm": 1.1545250415802002, + "learning_rate": 1.6258937802141355e-05, + "loss": 0.182830810546875, + "step": 7272 + }, + { + "epoch": 0.49161822360416385, + "grad_norm": 0.7934495210647583, + "learning_rate": 1.625564988320772e-05, + "loss": 0.1594085693359375, + "step": 7273 + }, + { + "epoch": 0.491685818575098, + "grad_norm": 0.8264339566230774, + "learning_rate": 1.6252361903517936e-05, + "loss": 0.14007568359375, + "step": 7274 + }, + { + "epoch": 0.4917534135460322, + "grad_norm": 0.4351367950439453, + "learning_rate": 1.6249073863231093e-05, + "loss": 0.06536865234375, + "step": 7275 + }, + { + "epoch": 0.49182100851696636, + "grad_norm": 0.6170986294746399, + "learning_rate": 1.624578576250629e-05, + "loss": 0.1391448974609375, + "step": 7276 + }, + { + "epoch": 0.4918886034879005, + "grad_norm": 0.4577484130859375, + "learning_rate": 1.6242497601502627e-05, + "loss": 0.05426788330078125, + "step": 7277 + }, + { + "epoch": 0.49195619845883465, + "grad_norm": 0.3286694288253784, + "learning_rate": 1.62392093803792e-05, + "loss": 0.06793212890625, + "step": 7278 + }, + { + "epoch": 0.4920237934297688, + "grad_norm": 0.1989009827375412, + "learning_rate": 1.6235921099295117e-05, + "loss": 0.05401611328125, + "step": 7279 + }, + { + "epoch": 0.492091388400703, + "grad_norm": 0.2061288207769394, + "learning_rate": 1.6232632758409484e-05, + "loss": 0.044017791748046875, + "step": 7280 + }, + { + "epoch": 0.49215898337163716, + "grad_norm": 0.76969313621521, + "learning_rate": 1.6229344357881413e-05, + "loss": 0.191375732421875, + "step": 7281 + }, + { + "epoch": 0.49222657834257133, + "grad_norm": 0.3890742361545563, + "learning_rate": 1.6226055897870015e-05, + "loss": 0.07394981384277344, + "step": 7282 + }, + { + "epoch": 0.4922941733135055, + "grad_norm": 0.2676253318786621, + "learning_rate": 1.6222767378534413e-05, + "loss": 0.046051025390625, + "step": 7283 + }, + { + "epoch": 0.4923617682844396, + "grad_norm": 0.1719743013381958, + "learning_rate": 1.6219478800033714e-05, + "loss": 0.025936126708984375, + "step": 7284 + }, + { + "epoch": 0.4924293632553738, + "grad_norm": 0.3461783230304718, + "learning_rate": 1.621619016252705e-05, + "loss": 0.06463623046875, + "step": 7285 + }, + { + "epoch": 0.49249695822630796, + "grad_norm": 1.6086863279342651, + "learning_rate": 1.621290146617355e-05, + "loss": 0.250244140625, + "step": 7286 + }, + { + "epoch": 0.4925645531972421, + "grad_norm": 0.5730647444725037, + "learning_rate": 1.6209612711132326e-05, + "loss": 0.119964599609375, + "step": 7287 + }, + { + "epoch": 0.4926321481681763, + "grad_norm": 0.9195044636726379, + "learning_rate": 1.6206323897562515e-05, + "loss": 0.239410400390625, + "step": 7288 + }, + { + "epoch": 0.49269974313911047, + "grad_norm": 0.24567072093486786, + "learning_rate": 1.6203035025623254e-05, + "loss": 0.052600860595703125, + "step": 7289 + }, + { + "epoch": 0.49276733811004464, + "grad_norm": 0.27706220746040344, + "learning_rate": 1.6199746095473677e-05, + "loss": 0.044793128967285156, + "step": 7290 + }, + { + "epoch": 0.49283493308097875, + "grad_norm": 0.3209792673587799, + "learning_rate": 1.6196457107272924e-05, + "loss": 0.049591064453125, + "step": 7291 + }, + { + "epoch": 0.4929025280519129, + "grad_norm": 0.41085925698280334, + "learning_rate": 1.619316806118013e-05, + "loss": 0.09418869018554688, + "step": 7292 + }, + { + "epoch": 0.4929701230228471, + "grad_norm": 0.31481823325157166, + "learning_rate": 1.6189878957354454e-05, + "loss": 0.07466888427734375, + "step": 7293 + }, + { + "epoch": 0.49303771799378127, + "grad_norm": 0.2426975965499878, + "learning_rate": 1.618658979595503e-05, + "loss": 0.06000518798828125, + "step": 7294 + }, + { + "epoch": 0.49310531296471544, + "grad_norm": 0.5753142237663269, + "learning_rate": 1.618330057714101e-05, + "loss": 0.1319122314453125, + "step": 7295 + }, + { + "epoch": 0.4931729079356496, + "grad_norm": 0.560821533203125, + "learning_rate": 1.6180011301071553e-05, + "loss": 0.1240386962890625, + "step": 7296 + }, + { + "epoch": 0.4932405029065838, + "grad_norm": 0.3135623037815094, + "learning_rate": 1.6176721967905807e-05, + "loss": 0.04611396789550781, + "step": 7297 + }, + { + "epoch": 0.4933080978775179, + "grad_norm": 0.6785950064659119, + "learning_rate": 1.617343257780294e-05, + "loss": 0.10742568969726562, + "step": 7298 + }, + { + "epoch": 0.49337569284845206, + "grad_norm": 0.5586270689964294, + "learning_rate": 1.6170143130922102e-05, + "loss": 0.14617919921875, + "step": 7299 + }, + { + "epoch": 0.49344328781938623, + "grad_norm": 1.145000696182251, + "learning_rate": 1.616685362742246e-05, + "loss": 0.1791839599609375, + "step": 7300 + }, + { + "epoch": 0.4935108827903204, + "grad_norm": 0.5610786080360413, + "learning_rate": 1.616356406746319e-05, + "loss": 0.1209869384765625, + "step": 7301 + }, + { + "epoch": 0.4935784777612546, + "grad_norm": 0.7501962184906006, + "learning_rate": 1.6160274451203454e-05, + "loss": 0.1986083984375, + "step": 7302 + }, + { + "epoch": 0.49364607273218875, + "grad_norm": 0.5325348377227783, + "learning_rate": 1.615698477880242e-05, + "loss": 0.09600448608398438, + "step": 7303 + }, + { + "epoch": 0.49371366770312286, + "grad_norm": 0.3176295757293701, + "learning_rate": 1.615369505041927e-05, + "loss": 0.055908203125, + "step": 7304 + }, + { + "epoch": 0.49378126267405703, + "grad_norm": 0.9277814030647278, + "learning_rate": 1.6150405266213176e-05, + "loss": 0.1529541015625, + "step": 7305 + }, + { + "epoch": 0.4938488576449912, + "grad_norm": 0.3154785633087158, + "learning_rate": 1.6147115426343322e-05, + "loss": 0.048976898193359375, + "step": 7306 + }, + { + "epoch": 0.49391645261592537, + "grad_norm": 1.0402928590774536, + "learning_rate": 1.6143825530968893e-05, + "loss": 0.214752197265625, + "step": 7307 + }, + { + "epoch": 0.49398404758685954, + "grad_norm": 0.7720203995704651, + "learning_rate": 1.614053558024907e-05, + "loss": 0.144439697265625, + "step": 7308 + }, + { + "epoch": 0.4940516425577937, + "grad_norm": 0.3444816470146179, + "learning_rate": 1.6137245574343035e-05, + "loss": 0.09273529052734375, + "step": 7309 + }, + { + "epoch": 0.4941192375287279, + "grad_norm": 0.6656085252761841, + "learning_rate": 1.613395551341e-05, + "loss": 0.130889892578125, + "step": 7310 + }, + { + "epoch": 0.494186832499662, + "grad_norm": 1.2385845184326172, + "learning_rate": 1.6130665397609136e-05, + "loss": 0.1898040771484375, + "step": 7311 + }, + { + "epoch": 0.49425442747059617, + "grad_norm": 0.8097047209739685, + "learning_rate": 1.6127375227099653e-05, + "loss": 0.1886138916015625, + "step": 7312 + }, + { + "epoch": 0.49432202244153034, + "grad_norm": 0.981515645980835, + "learning_rate": 1.612408500204074e-05, + "loss": 0.23944091796875, + "step": 7313 + }, + { + "epoch": 0.4943896174124645, + "grad_norm": 0.20753231644630432, + "learning_rate": 1.612079472259161e-05, + "loss": 0.034183502197265625, + "step": 7314 + }, + { + "epoch": 0.4944572123833987, + "grad_norm": 0.3943125903606415, + "learning_rate": 1.6117504388911458e-05, + "loss": 0.08153533935546875, + "step": 7315 + }, + { + "epoch": 0.49452480735433285, + "grad_norm": 0.3998587727546692, + "learning_rate": 1.6114214001159494e-05, + "loss": 0.07596588134765625, + "step": 7316 + }, + { + "epoch": 0.494592402325267, + "grad_norm": 0.5945999622344971, + "learning_rate": 1.6110923559494926e-05, + "loss": 0.09439849853515625, + "step": 7317 + }, + { + "epoch": 0.49465999729620114, + "grad_norm": 0.3065897524356842, + "learning_rate": 1.610763306407697e-05, + "loss": 0.050418853759765625, + "step": 7318 + }, + { + "epoch": 0.4947275922671353, + "grad_norm": 0.4185808598995209, + "learning_rate": 1.6104342515064837e-05, + "loss": 0.0989532470703125, + "step": 7319 + }, + { + "epoch": 0.4947951872380695, + "grad_norm": 0.966991126537323, + "learning_rate": 1.6101051912617746e-05, + "loss": 0.214447021484375, + "step": 7320 + }, + { + "epoch": 0.49486278220900365, + "grad_norm": 0.5047387480735779, + "learning_rate": 1.609776125689492e-05, + "loss": 0.096649169921875, + "step": 7321 + }, + { + "epoch": 0.4949303771799378, + "grad_norm": 0.9226104617118835, + "learning_rate": 1.609447054805557e-05, + "loss": 0.1742401123046875, + "step": 7322 + }, + { + "epoch": 0.494997972150872, + "grad_norm": 0.5007895231246948, + "learning_rate": 1.6091179786258935e-05, + "loss": 0.10952377319335938, + "step": 7323 + }, + { + "epoch": 0.49506556712180616, + "grad_norm": 0.42784392833709717, + "learning_rate": 1.6087888971664233e-05, + "loss": 0.0474700927734375, + "step": 7324 + }, + { + "epoch": 0.4951331620927403, + "grad_norm": 0.8190847635269165, + "learning_rate": 1.6084598104430695e-05, + "loss": 0.149200439453125, + "step": 7325 + }, + { + "epoch": 0.49520075706367445, + "grad_norm": 0.732704758644104, + "learning_rate": 1.6081307184717554e-05, + "loss": 0.1703948974609375, + "step": 7326 + }, + { + "epoch": 0.4952683520346086, + "grad_norm": 0.9869948625564575, + "learning_rate": 1.607801621268405e-05, + "loss": 0.1533355712890625, + "step": 7327 + }, + { + "epoch": 0.4953359470055428, + "grad_norm": 1.1237223148345947, + "learning_rate": 1.607472518848942e-05, + "loss": 0.1526336669921875, + "step": 7328 + }, + { + "epoch": 0.49540354197647696, + "grad_norm": 0.683488667011261, + "learning_rate": 1.60714341122929e-05, + "loss": 0.12457275390625, + "step": 7329 + }, + { + "epoch": 0.49547113694741113, + "grad_norm": 0.33233070373535156, + "learning_rate": 1.606814298425373e-05, + "loss": 0.0566864013671875, + "step": 7330 + }, + { + "epoch": 0.4955387319183453, + "grad_norm": 1.0301941633224487, + "learning_rate": 1.6064851804531167e-05, + "loss": 0.2032470703125, + "step": 7331 + }, + { + "epoch": 0.4956063268892794, + "grad_norm": 0.3935468792915344, + "learning_rate": 1.6061560573284448e-05, + "loss": 0.100921630859375, + "step": 7332 + }, + { + "epoch": 0.4956739218602136, + "grad_norm": 0.21315374970436096, + "learning_rate": 1.6058269290672827e-05, + "loss": 0.020212173461914062, + "step": 7333 + }, + { + "epoch": 0.49574151683114775, + "grad_norm": 0.5079295635223389, + "learning_rate": 1.6054977956855555e-05, + "loss": 0.11810302734375, + "step": 7334 + }, + { + "epoch": 0.4958091118020819, + "grad_norm": 0.27873048186302185, + "learning_rate": 1.605168657199189e-05, + "loss": 0.053955078125, + "step": 7335 + }, + { + "epoch": 0.4958767067730161, + "grad_norm": 1.6466593742370605, + "learning_rate": 1.604839513624109e-05, + "loss": 0.2264556884765625, + "step": 7336 + }, + { + "epoch": 0.49594430174395027, + "grad_norm": 0.3592066168785095, + "learning_rate": 1.604510364976241e-05, + "loss": 0.082550048828125, + "step": 7337 + }, + { + "epoch": 0.49601189671488444, + "grad_norm": 0.413329154253006, + "learning_rate": 1.6041812112715114e-05, + "loss": 0.07808685302734375, + "step": 7338 + }, + { + "epoch": 0.49607949168581855, + "grad_norm": 0.7251843214035034, + "learning_rate": 1.6038520525258477e-05, + "loss": 0.1182403564453125, + "step": 7339 + }, + { + "epoch": 0.4961470866567527, + "grad_norm": 0.41644132137298584, + "learning_rate": 1.6035228887551756e-05, + "loss": 0.0654754638671875, + "step": 7340 + }, + { + "epoch": 0.4962146816276869, + "grad_norm": 0.7317934036254883, + "learning_rate": 1.603193719975422e-05, + "loss": 0.140655517578125, + "step": 7341 + }, + { + "epoch": 0.49628227659862106, + "grad_norm": 1.6079161167144775, + "learning_rate": 1.6028645462025147e-05, + "loss": 0.24407958984375, + "step": 7342 + }, + { + "epoch": 0.49634987156955523, + "grad_norm": 0.3380449414253235, + "learning_rate": 1.6025353674523804e-05, + "loss": 0.053920745849609375, + "step": 7343 + }, + { + "epoch": 0.4964174665404894, + "grad_norm": 0.4480196237564087, + "learning_rate": 1.602206183740948e-05, + "loss": 0.0960845947265625, + "step": 7344 + }, + { + "epoch": 0.4964850615114236, + "grad_norm": 0.9680476188659668, + "learning_rate": 1.6018769950841448e-05, + "loss": 0.1584014892578125, + "step": 7345 + }, + { + "epoch": 0.4965526564823577, + "grad_norm": 0.8778985738754272, + "learning_rate": 1.6015478014978987e-05, + "loss": 0.213165283203125, + "step": 7346 + }, + { + "epoch": 0.49662025145329186, + "grad_norm": 1.1865267753601074, + "learning_rate": 1.601218602998139e-05, + "loss": 0.173919677734375, + "step": 7347 + }, + { + "epoch": 0.49668784642422603, + "grad_norm": 0.21345815062522888, + "learning_rate": 1.6008893996007932e-05, + "loss": 0.04718017578125, + "step": 7348 + }, + { + "epoch": 0.4967554413951602, + "grad_norm": 0.26677772402763367, + "learning_rate": 1.600560191321791e-05, + "loss": 0.05596923828125, + "step": 7349 + }, + { + "epoch": 0.4968230363660944, + "grad_norm": 0.6435859799385071, + "learning_rate": 1.6002309781770616e-05, + "loss": 0.1385498046875, + "step": 7350 + }, + { + "epoch": 0.49689063133702854, + "grad_norm": 0.8879885673522949, + "learning_rate": 1.599901760182534e-05, + "loss": 0.214569091796875, + "step": 7351 + }, + { + "epoch": 0.4969582263079627, + "grad_norm": 1.1820436716079712, + "learning_rate": 1.5995725373541376e-05, + "loss": 0.191680908203125, + "step": 7352 + }, + { + "epoch": 0.49702582127889683, + "grad_norm": 0.753150999546051, + "learning_rate": 1.599243309707803e-05, + "loss": 0.1554412841796875, + "step": 7353 + }, + { + "epoch": 0.497093416249831, + "grad_norm": 0.30036380887031555, + "learning_rate": 1.59891407725946e-05, + "loss": 0.0473175048828125, + "step": 7354 + }, + { + "epoch": 0.49716101122076517, + "grad_norm": 0.3352428674697876, + "learning_rate": 1.5985848400250383e-05, + "loss": 0.05266571044921875, + "step": 7355 + }, + { + "epoch": 0.49722860619169934, + "grad_norm": 0.6587454676628113, + "learning_rate": 1.5982555980204692e-05, + "loss": 0.1428680419921875, + "step": 7356 + }, + { + "epoch": 0.4972962011626335, + "grad_norm": 0.6489396691322327, + "learning_rate": 1.5979263512616835e-05, + "loss": 0.08726310729980469, + "step": 7357 + }, + { + "epoch": 0.4973637961335677, + "grad_norm": 0.8287841081619263, + "learning_rate": 1.5975970997646117e-05, + "loss": 0.13129806518554688, + "step": 7358 + }, + { + "epoch": 0.49743139110450185, + "grad_norm": 0.933113694190979, + "learning_rate": 1.597267843545185e-05, + "loss": 0.2216796875, + "step": 7359 + }, + { + "epoch": 0.49749898607543597, + "grad_norm": 0.5388460159301758, + "learning_rate": 1.5969385826193357e-05, + "loss": 0.13714599609375, + "step": 7360 + }, + { + "epoch": 0.49756658104637014, + "grad_norm": 0.6443063616752625, + "learning_rate": 1.5966093170029945e-05, + "loss": 0.10626983642578125, + "step": 7361 + }, + { + "epoch": 0.4976341760173043, + "grad_norm": 0.6895450353622437, + "learning_rate": 1.5962800467120943e-05, + "loss": 0.12967681884765625, + "step": 7362 + }, + { + "epoch": 0.4977017709882385, + "grad_norm": 0.5169270634651184, + "learning_rate": 1.595950771762566e-05, + "loss": 0.07779693603515625, + "step": 7363 + }, + { + "epoch": 0.49776936595917265, + "grad_norm": 0.7824186682701111, + "learning_rate": 1.5956214921703424e-05, + "loss": 0.135955810546875, + "step": 7364 + }, + { + "epoch": 0.4978369609301068, + "grad_norm": 0.3740943670272827, + "learning_rate": 1.5952922079513573e-05, + "loss": 0.061634063720703125, + "step": 7365 + }, + { + "epoch": 0.49790455590104093, + "grad_norm": 0.49501872062683105, + "learning_rate": 1.5949629191215418e-05, + "loss": 0.11700439453125, + "step": 7366 + }, + { + "epoch": 0.4979721508719751, + "grad_norm": 0.6625795960426331, + "learning_rate": 1.5946336256968304e-05, + "loss": 0.154754638671875, + "step": 7367 + }, + { + "epoch": 0.4980397458429093, + "grad_norm": 0.35250115394592285, + "learning_rate": 1.594304327693155e-05, + "loss": 0.04748725891113281, + "step": 7368 + }, + { + "epoch": 0.49810734081384345, + "grad_norm": 0.3176914155483246, + "learning_rate": 1.59397502512645e-05, + "loss": 0.07391357421875, + "step": 7369 + }, + { + "epoch": 0.4981749357847776, + "grad_norm": 0.39057883620262146, + "learning_rate": 1.593645718012649e-05, + "loss": 0.07590484619140625, + "step": 7370 + }, + { + "epoch": 0.4982425307557118, + "grad_norm": 0.3699477016925812, + "learning_rate": 1.5933164063676857e-05, + "loss": 0.036235809326171875, + "step": 7371 + }, + { + "epoch": 0.49831012572664596, + "grad_norm": 0.19539444148540497, + "learning_rate": 1.592987090207494e-05, + "loss": 0.03155517578125, + "step": 7372 + }, + { + "epoch": 0.4983777206975801, + "grad_norm": 1.6342215538024902, + "learning_rate": 1.5926577695480093e-05, + "loss": 0.2040557861328125, + "step": 7373 + }, + { + "epoch": 0.49844531566851424, + "grad_norm": 0.6096976399421692, + "learning_rate": 1.592328444405165e-05, + "loss": 0.10432815551757812, + "step": 7374 + }, + { + "epoch": 0.4985129106394484, + "grad_norm": 0.36664721369743347, + "learning_rate": 1.5919991147948963e-05, + "loss": 0.06401824951171875, + "step": 7375 + }, + { + "epoch": 0.4985805056103826, + "grad_norm": 0.8512067794799805, + "learning_rate": 1.5916697807331383e-05, + "loss": 0.14100265502929688, + "step": 7376 + }, + { + "epoch": 0.49864810058131676, + "grad_norm": 0.7571427822113037, + "learning_rate": 1.5913404422358265e-05, + "loss": 0.11344146728515625, + "step": 7377 + }, + { + "epoch": 0.4987156955522509, + "grad_norm": 0.3142307996749878, + "learning_rate": 1.591011099318896e-05, + "loss": 0.06717300415039062, + "step": 7378 + }, + { + "epoch": 0.4987832905231851, + "grad_norm": 0.7091249823570251, + "learning_rate": 1.5906817519982825e-05, + "loss": 0.11651611328125, + "step": 7379 + }, + { + "epoch": 0.4988508854941192, + "grad_norm": 0.35436445474624634, + "learning_rate": 1.5903524002899218e-05, + "loss": 0.06916046142578125, + "step": 7380 + }, + { + "epoch": 0.4989184804650534, + "grad_norm": 0.2175293117761612, + "learning_rate": 1.5900230442097503e-05, + "loss": 0.03337287902832031, + "step": 7381 + }, + { + "epoch": 0.49898607543598755, + "grad_norm": 0.9054408073425293, + "learning_rate": 1.5896936837737037e-05, + "loss": 0.13300323486328125, + "step": 7382 + }, + { + "epoch": 0.4990536704069217, + "grad_norm": 0.5573973059654236, + "learning_rate": 1.5893643189977192e-05, + "loss": 0.09319305419921875, + "step": 7383 + }, + { + "epoch": 0.4991212653778559, + "grad_norm": 0.7591274976730347, + "learning_rate": 1.5890349498977328e-05, + "loss": 0.1571807861328125, + "step": 7384 + }, + { + "epoch": 0.49918886034879006, + "grad_norm": 0.46400776505470276, + "learning_rate": 1.5887055764896822e-05, + "loss": 0.07436370849609375, + "step": 7385 + }, + { + "epoch": 0.49925645531972423, + "grad_norm": 0.9811186194419861, + "learning_rate": 1.588376198789504e-05, + "loss": 0.1312255859375, + "step": 7386 + }, + { + "epoch": 0.49932405029065835, + "grad_norm": 0.22094140946865082, + "learning_rate": 1.5880468168131358e-05, + "loss": 0.024492263793945312, + "step": 7387 + }, + { + "epoch": 0.4993916452615925, + "grad_norm": 0.7495774626731873, + "learning_rate": 1.587717430576515e-05, + "loss": 0.143890380859375, + "step": 7388 + }, + { + "epoch": 0.4994592402325267, + "grad_norm": 0.8225517868995667, + "learning_rate": 1.587388040095579e-05, + "loss": 0.1862640380859375, + "step": 7389 + }, + { + "epoch": 0.49952683520346086, + "grad_norm": 1.0419020652770996, + "learning_rate": 1.5870586453862668e-05, + "loss": 0.1445159912109375, + "step": 7390 + }, + { + "epoch": 0.49959443017439503, + "grad_norm": 0.373029500246048, + "learning_rate": 1.5867292464645156e-05, + "loss": 0.06651687622070312, + "step": 7391 + }, + { + "epoch": 0.4996620251453292, + "grad_norm": 0.8593101501464844, + "learning_rate": 1.5863998433462643e-05, + "loss": 0.1443634033203125, + "step": 7392 + }, + { + "epoch": 0.4997296201162634, + "grad_norm": 0.8133074641227722, + "learning_rate": 1.586070436047451e-05, + "loss": 0.11502265930175781, + "step": 7393 + }, + { + "epoch": 0.4997972150871975, + "grad_norm": 0.18497666716575623, + "learning_rate": 1.585741024584015e-05, + "loss": 0.02713775634765625, + "step": 7394 + }, + { + "epoch": 0.49986481005813166, + "grad_norm": 0.3452150523662567, + "learning_rate": 1.585411608971895e-05, + "loss": 0.06134796142578125, + "step": 7395 + }, + { + "epoch": 0.49993240502906583, + "grad_norm": 0.5006141662597656, + "learning_rate": 1.5850821892270302e-05, + "loss": 0.07978057861328125, + "step": 7396 + }, + { + "epoch": 0.5, + "grad_norm": 0.7661518454551697, + "learning_rate": 1.5847527653653595e-05, + "loss": 0.13242340087890625, + "step": 7397 + }, + { + "epoch": 0.5000675949709341, + "grad_norm": 0.21157489717006683, + "learning_rate": 1.584423337402823e-05, + "loss": 0.043426513671875, + "step": 7398 + }, + { + "epoch": 0.5001351899418683, + "grad_norm": 0.6641184091567993, + "learning_rate": 1.584093905355361e-05, + "loss": 0.15289306640625, + "step": 7399 + }, + { + "epoch": 0.5002027849128025, + "grad_norm": 0.41656962037086487, + "learning_rate": 1.583764469238913e-05, + "loss": 0.07480621337890625, + "step": 7400 + }, + { + "epoch": 0.5002703798837367, + "grad_norm": 0.5629693269729614, + "learning_rate": 1.583435029069418e-05, + "loss": 0.159149169921875, + "step": 7401 + }, + { + "epoch": 0.5003379748546708, + "grad_norm": 0.379846453666687, + "learning_rate": 1.583105584862818e-05, + "loss": 0.0506744384765625, + "step": 7402 + }, + { + "epoch": 0.500405569825605, + "grad_norm": 0.9639049172401428, + "learning_rate": 1.5827761366350536e-05, + "loss": 0.130035400390625, + "step": 7403 + }, + { + "epoch": 0.5004731647965391, + "grad_norm": 0.7946786880493164, + "learning_rate": 1.5824466844020645e-05, + "loss": 0.14154052734375, + "step": 7404 + }, + { + "epoch": 0.5005407597674733, + "grad_norm": 0.9738809466362, + "learning_rate": 1.5821172281797914e-05, + "loss": 0.233551025390625, + "step": 7405 + }, + { + "epoch": 0.5006083547384075, + "grad_norm": 0.20811201632022858, + "learning_rate": 1.581787767984177e-05, + "loss": 0.035053253173828125, + "step": 7406 + }, + { + "epoch": 0.5006759497093416, + "grad_norm": 0.26287078857421875, + "learning_rate": 1.581458303831161e-05, + "loss": 0.06510162353515625, + "step": 7407 + }, + { + "epoch": 0.5007435446802758, + "grad_norm": 0.39229196310043335, + "learning_rate": 1.5811288357366862e-05, + "loss": 0.07151412963867188, + "step": 7408 + }, + { + "epoch": 0.5008111396512099, + "grad_norm": 1.0944080352783203, + "learning_rate": 1.5807993637166937e-05, + "loss": 0.2037506103515625, + "step": 7409 + }, + { + "epoch": 0.5008787346221442, + "grad_norm": 0.8127236366271973, + "learning_rate": 1.5804698877871254e-05, + "loss": 0.1730194091796875, + "step": 7410 + }, + { + "epoch": 0.5009463295930783, + "grad_norm": 0.8600059151649475, + "learning_rate": 1.5801404079639236e-05, + "loss": 0.12726593017578125, + "step": 7411 + }, + { + "epoch": 0.5010139245640124, + "grad_norm": 0.6210471987724304, + "learning_rate": 1.5798109242630306e-05, + "loss": 0.1240234375, + "step": 7412 + }, + { + "epoch": 0.5010815195349466, + "grad_norm": 0.9127257466316223, + "learning_rate": 1.5794814367003884e-05, + "loss": 0.162567138671875, + "step": 7413 + }, + { + "epoch": 0.5011491145058807, + "grad_norm": 1.1473881006240845, + "learning_rate": 1.5791519452919398e-05, + "loss": 0.194915771484375, + "step": 7414 + }, + { + "epoch": 0.501216709476815, + "grad_norm": 0.48945990204811096, + "learning_rate": 1.5788224500536286e-05, + "loss": 0.10760498046875, + "step": 7415 + }, + { + "epoch": 0.5012843044477491, + "grad_norm": 0.2832997441291809, + "learning_rate": 1.578492951001397e-05, + "loss": 0.04301261901855469, + "step": 7416 + }, + { + "epoch": 0.5013518994186833, + "grad_norm": 0.6838555932044983, + "learning_rate": 1.5781634481511882e-05, + "loss": 0.11081695556640625, + "step": 7417 + }, + { + "epoch": 0.5014194943896174, + "grad_norm": 0.2994546592235565, + "learning_rate": 1.577833941518945e-05, + "loss": 0.09369659423828125, + "step": 7418 + }, + { + "epoch": 0.5014870893605515, + "grad_norm": 1.016229510307312, + "learning_rate": 1.577504431120612e-05, + "loss": 0.19769287109375, + "step": 7419 + }, + { + "epoch": 0.5015546843314858, + "grad_norm": 1.2522177696228027, + "learning_rate": 1.5771749169721327e-05, + "loss": 0.205352783203125, + "step": 7420 + }, + { + "epoch": 0.5016222793024199, + "grad_norm": 0.22990892827510834, + "learning_rate": 1.5768453990894515e-05, + "loss": 0.0447540283203125, + "step": 7421 + }, + { + "epoch": 0.5016898742733541, + "grad_norm": 0.4477519094944, + "learning_rate": 1.5765158774885108e-05, + "loss": 0.1094207763671875, + "step": 7422 + }, + { + "epoch": 0.5017574692442882, + "grad_norm": 0.35765504837036133, + "learning_rate": 1.576186352185257e-05, + "loss": 0.05322074890136719, + "step": 7423 + }, + { + "epoch": 0.5018250642152224, + "grad_norm": 0.981070339679718, + "learning_rate": 1.5758568231956338e-05, + "loss": 0.1854248046875, + "step": 7424 + }, + { + "epoch": 0.5018926591861566, + "grad_norm": 0.3613397181034088, + "learning_rate": 1.5755272905355853e-05, + "loss": 0.0740203857421875, + "step": 7425 + }, + { + "epoch": 0.5019602541570907, + "grad_norm": 0.311381071805954, + "learning_rate": 1.575197754221057e-05, + "loss": 0.0649261474609375, + "step": 7426 + }, + { + "epoch": 0.5020278491280249, + "grad_norm": 0.18188738822937012, + "learning_rate": 1.5748682142679937e-05, + "loss": 0.033451080322265625, + "step": 7427 + }, + { + "epoch": 0.502095444098959, + "grad_norm": 0.185340017080307, + "learning_rate": 1.5745386706923406e-05, + "loss": 0.027538299560546875, + "step": 7428 + }, + { + "epoch": 0.5021630390698932, + "grad_norm": 0.37598663568496704, + "learning_rate": 1.5742091235100434e-05, + "loss": 0.05768585205078125, + "step": 7429 + }, + { + "epoch": 0.5022306340408274, + "grad_norm": 0.3400786519050598, + "learning_rate": 1.5738795727370464e-05, + "loss": 0.0863494873046875, + "step": 7430 + }, + { + "epoch": 0.5022982290117616, + "grad_norm": 0.8053165674209595, + "learning_rate": 1.5735500183892966e-05, + "loss": 0.185821533203125, + "step": 7431 + }, + { + "epoch": 0.5023658239826957, + "grad_norm": 0.7879858016967773, + "learning_rate": 1.5732204604827395e-05, + "loss": 0.1889495849609375, + "step": 7432 + }, + { + "epoch": 0.5024334189536298, + "grad_norm": 0.9774047136306763, + "learning_rate": 1.572890899033322e-05, + "loss": 0.1440582275390625, + "step": 7433 + }, + { + "epoch": 0.502501013924564, + "grad_norm": 0.8091111183166504, + "learning_rate": 1.5725613340569888e-05, + "loss": 0.22821044921875, + "step": 7434 + }, + { + "epoch": 0.5025686088954981, + "grad_norm": 0.6971925497055054, + "learning_rate": 1.5722317655696865e-05, + "loss": 0.1382598876953125, + "step": 7435 + }, + { + "epoch": 0.5026362038664324, + "grad_norm": 0.5562623143196106, + "learning_rate": 1.5719021935873626e-05, + "loss": 0.135284423828125, + "step": 7436 + }, + { + "epoch": 0.5027037988373665, + "grad_norm": 0.37015512585639954, + "learning_rate": 1.5715726181259637e-05, + "loss": 0.0823516845703125, + "step": 7437 + }, + { + "epoch": 0.5027713938083007, + "grad_norm": 0.2413095384836197, + "learning_rate": 1.5712430392014364e-05, + "loss": 0.029298782348632812, + "step": 7438 + }, + { + "epoch": 0.5028389887792348, + "grad_norm": 0.2197572886943817, + "learning_rate": 1.5709134568297276e-05, + "loss": 0.052829742431640625, + "step": 7439 + }, + { + "epoch": 0.502906583750169, + "grad_norm": 0.7147995829582214, + "learning_rate": 1.5705838710267845e-05, + "loss": 0.1751251220703125, + "step": 7440 + }, + { + "epoch": 0.5029741787211032, + "grad_norm": 2.03722882270813, + "learning_rate": 1.5702542818085555e-05, + "loss": 0.15543365478515625, + "step": 7441 + }, + { + "epoch": 0.5030417736920373, + "grad_norm": 0.6312968134880066, + "learning_rate": 1.569924689190987e-05, + "loss": 0.09465408325195312, + "step": 7442 + }, + { + "epoch": 0.5031093686629715, + "grad_norm": 0.3775637447834015, + "learning_rate": 1.5695950931900268e-05, + "loss": 0.0690460205078125, + "step": 7443 + }, + { + "epoch": 0.5031769636339056, + "grad_norm": 0.8330435752868652, + "learning_rate": 1.5692654938216234e-05, + "loss": 0.19427490234375, + "step": 7444 + }, + { + "epoch": 0.5032445586048399, + "grad_norm": 0.5900450348854065, + "learning_rate": 1.568935891101725e-05, + "loss": 0.08592987060546875, + "step": 7445 + }, + { + "epoch": 0.503312153575774, + "grad_norm": 0.3132379651069641, + "learning_rate": 1.5686062850462792e-05, + "loss": 0.054965972900390625, + "step": 7446 + }, + { + "epoch": 0.5033797485467081, + "grad_norm": 1.1572884321212769, + "learning_rate": 1.568276675671235e-05, + "loss": 0.216796875, + "step": 7447 + }, + { + "epoch": 0.5034473435176423, + "grad_norm": 0.41922304034233093, + "learning_rate": 1.56794706299254e-05, + "loss": 0.1027069091796875, + "step": 7448 + }, + { + "epoch": 0.5035149384885764, + "grad_norm": 0.24610528349876404, + "learning_rate": 1.567617447026144e-05, + "loss": 0.05791473388671875, + "step": 7449 + }, + { + "epoch": 0.5035825334595107, + "grad_norm": 0.5125845074653625, + "learning_rate": 1.5672878277879956e-05, + "loss": 0.10567474365234375, + "step": 7450 + }, + { + "epoch": 0.5036501284304448, + "grad_norm": 0.4690602123737335, + "learning_rate": 1.566958205294043e-05, + "loss": 0.1000213623046875, + "step": 7451 + }, + { + "epoch": 0.503717723401379, + "grad_norm": 0.705198347568512, + "learning_rate": 1.566628579560237e-05, + "loss": 0.146697998046875, + "step": 7452 + }, + { + "epoch": 0.5037853183723131, + "grad_norm": 0.7657268047332764, + "learning_rate": 1.566298950602525e-05, + "loss": 0.1323394775390625, + "step": 7453 + }, + { + "epoch": 0.5038529133432472, + "grad_norm": 0.45203515887260437, + "learning_rate": 1.565969318436858e-05, + "loss": 0.0855560302734375, + "step": 7454 + }, + { + "epoch": 0.5039205083141814, + "grad_norm": 0.5228220224380493, + "learning_rate": 1.5656396830791847e-05, + "loss": 0.10112380981445312, + "step": 7455 + }, + { + "epoch": 0.5039881032851156, + "grad_norm": 0.5005627870559692, + "learning_rate": 1.5653100445454557e-05, + "loss": 0.1092681884765625, + "step": 7456 + }, + { + "epoch": 0.5040556982560498, + "grad_norm": 0.5396689176559448, + "learning_rate": 1.5649804028516202e-05, + "loss": 0.09801483154296875, + "step": 7457 + }, + { + "epoch": 0.5041232932269839, + "grad_norm": 0.4246930778026581, + "learning_rate": 1.564650758013629e-05, + "loss": 0.0958099365234375, + "step": 7458 + }, + { + "epoch": 0.5041908881979181, + "grad_norm": 0.7225816249847412, + "learning_rate": 1.564321110047432e-05, + "loss": 0.11551666259765625, + "step": 7459 + }, + { + "epoch": 0.5042584831688522, + "grad_norm": 0.2547176778316498, + "learning_rate": 1.5639914589689797e-05, + "loss": 0.03553009033203125, + "step": 7460 + }, + { + "epoch": 0.5043260781397864, + "grad_norm": 0.6669124960899353, + "learning_rate": 1.5636618047942225e-05, + "loss": 0.1194000244140625, + "step": 7461 + }, + { + "epoch": 0.5043936731107206, + "grad_norm": 1.1039034128189087, + "learning_rate": 1.5633321475391118e-05, + "loss": 0.1962890625, + "step": 7462 + }, + { + "epoch": 0.5044612680816547, + "grad_norm": 0.5646640658378601, + "learning_rate": 1.5630024872195975e-05, + "loss": 0.14168548583984375, + "step": 7463 + }, + { + "epoch": 0.5045288630525889, + "grad_norm": 0.2750588059425354, + "learning_rate": 1.5626728238516306e-05, + "loss": 0.032749176025390625, + "step": 7464 + }, + { + "epoch": 0.504596458023523, + "grad_norm": 0.2919958531856537, + "learning_rate": 1.5623431574511635e-05, + "loss": 0.0532989501953125, + "step": 7465 + }, + { + "epoch": 0.5046640529944572, + "grad_norm": 0.4000965356826782, + "learning_rate": 1.5620134880341464e-05, + "loss": 0.077667236328125, + "step": 7466 + }, + { + "epoch": 0.5047316479653914, + "grad_norm": 0.6547604203224182, + "learning_rate": 1.5616838156165316e-05, + "loss": 0.1371307373046875, + "step": 7467 + }, + { + "epoch": 0.5047992429363255, + "grad_norm": 0.7429783940315247, + "learning_rate": 1.5613541402142693e-05, + "loss": 0.13347244262695312, + "step": 7468 + }, + { + "epoch": 0.5048668379072597, + "grad_norm": 1.3716224431991577, + "learning_rate": 1.5610244618433126e-05, + "loss": 0.15966033935546875, + "step": 7469 + }, + { + "epoch": 0.5049344328781938, + "grad_norm": 1.1491752862930298, + "learning_rate": 1.560694780519613e-05, + "loss": 0.1896514892578125, + "step": 7470 + }, + { + "epoch": 0.5050020278491281, + "grad_norm": 0.3536852300167084, + "learning_rate": 1.5603650962591224e-05, + "loss": 0.0818939208984375, + "step": 7471 + }, + { + "epoch": 0.5050696228200622, + "grad_norm": 0.4749554693698883, + "learning_rate": 1.560035409077793e-05, + "loss": 0.09611129760742188, + "step": 7472 + }, + { + "epoch": 0.5051372177909963, + "grad_norm": 0.43199923634529114, + "learning_rate": 1.5597057189915774e-05, + "loss": 0.0973968505859375, + "step": 7473 + }, + { + "epoch": 0.5052048127619305, + "grad_norm": 0.5116462707519531, + "learning_rate": 1.5593760260164276e-05, + "loss": 0.13121795654296875, + "step": 7474 + }, + { + "epoch": 0.5052724077328646, + "grad_norm": 0.3239955008029938, + "learning_rate": 1.5590463301682962e-05, + "loss": 0.06755828857421875, + "step": 7475 + }, + { + "epoch": 0.5053400027037989, + "grad_norm": 0.3315654397010803, + "learning_rate": 1.5587166314631363e-05, + "loss": 0.07295989990234375, + "step": 7476 + }, + { + "epoch": 0.505407597674733, + "grad_norm": 0.5889526009559631, + "learning_rate": 1.5583869299169008e-05, + "loss": 0.1021270751953125, + "step": 7477 + }, + { + "epoch": 0.5054751926456672, + "grad_norm": 0.5867426991462708, + "learning_rate": 1.558057225545542e-05, + "loss": 0.11062431335449219, + "step": 7478 + }, + { + "epoch": 0.5055427876166013, + "grad_norm": 1.0342893600463867, + "learning_rate": 1.5577275183650142e-05, + "loss": 0.178131103515625, + "step": 7479 + }, + { + "epoch": 0.5056103825875354, + "grad_norm": 0.3797129690647125, + "learning_rate": 1.5573978083912698e-05, + "loss": 0.0649261474609375, + "step": 7480 + }, + { + "epoch": 0.5056779775584697, + "grad_norm": 0.4922051429748535, + "learning_rate": 1.5570680956402623e-05, + "loss": 0.10693359375, + "step": 7481 + }, + { + "epoch": 0.5057455725294038, + "grad_norm": 0.6450899243354797, + "learning_rate": 1.5567383801279456e-05, + "loss": 0.10869598388671875, + "step": 7482 + }, + { + "epoch": 0.505813167500338, + "grad_norm": 0.8311208486557007, + "learning_rate": 1.5564086618702735e-05, + "loss": 0.18780517578125, + "step": 7483 + }, + { + "epoch": 0.5058807624712721, + "grad_norm": 1.7647418975830078, + "learning_rate": 1.556078940883199e-05, + "loss": 0.27191162109375, + "step": 7484 + }, + { + "epoch": 0.5059483574422063, + "grad_norm": 0.4188990592956543, + "learning_rate": 1.5557492171826772e-05, + "loss": 0.09006500244140625, + "step": 7485 + }, + { + "epoch": 0.5060159524131405, + "grad_norm": 0.5168343186378479, + "learning_rate": 1.555419490784661e-05, + "loss": 0.12862396240234375, + "step": 7486 + }, + { + "epoch": 0.5060835473840746, + "grad_norm": 0.20805613696575165, + "learning_rate": 1.5550897617051056e-05, + "loss": 0.033527374267578125, + "step": 7487 + }, + { + "epoch": 0.5061511423550088, + "grad_norm": 0.8556937575340271, + "learning_rate": 1.554760029959965e-05, + "loss": 0.09280014038085938, + "step": 7488 + }, + { + "epoch": 0.5062187373259429, + "grad_norm": 0.742011547088623, + "learning_rate": 1.5544302955651928e-05, + "loss": 0.17840576171875, + "step": 7489 + }, + { + "epoch": 0.5062863322968771, + "grad_norm": 1.4948947429656982, + "learning_rate": 1.5541005585367454e-05, + "loss": 0.26617431640625, + "step": 7490 + }, + { + "epoch": 0.5063539272678113, + "grad_norm": 0.6360655426979065, + "learning_rate": 1.553770818890576e-05, + "loss": 0.152252197265625, + "step": 7491 + }, + { + "epoch": 0.5064215222387455, + "grad_norm": 0.7083542943000793, + "learning_rate": 1.5534410766426397e-05, + "loss": 0.137939453125, + "step": 7492 + }, + { + "epoch": 0.5064891172096796, + "grad_norm": 0.2254847139120102, + "learning_rate": 1.5531113318088922e-05, + "loss": 0.03757667541503906, + "step": 7493 + }, + { + "epoch": 0.5065567121806137, + "grad_norm": 0.9422470331192017, + "learning_rate": 1.552781584405288e-05, + "loss": 0.11799049377441406, + "step": 7494 + }, + { + "epoch": 0.5066243071515479, + "grad_norm": 1.1521307229995728, + "learning_rate": 1.5524518344477826e-05, + "loss": 0.20965576171875, + "step": 7495 + }, + { + "epoch": 0.506691902122482, + "grad_norm": 0.3451806604862213, + "learning_rate": 1.5521220819523314e-05, + "loss": 0.042205810546875, + "step": 7496 + }, + { + "epoch": 0.5067594970934163, + "grad_norm": 0.4854431748390198, + "learning_rate": 1.5517923269348897e-05, + "loss": 0.0848846435546875, + "step": 7497 + }, + { + "epoch": 0.5068270920643504, + "grad_norm": 0.6452891826629639, + "learning_rate": 1.551462569411413e-05, + "loss": 0.15625, + "step": 7498 + }, + { + "epoch": 0.5068946870352846, + "grad_norm": 0.9037753939628601, + "learning_rate": 1.551132809397857e-05, + "loss": 0.10267257690429688, + "step": 7499 + }, + { + "epoch": 0.5069622820062187, + "grad_norm": 0.8975244164466858, + "learning_rate": 1.550803046910178e-05, + "loss": 0.1589813232421875, + "step": 7500 + }, + { + "epoch": 0.5070298769771528, + "grad_norm": 0.15864740312099457, + "learning_rate": 1.5504732819643315e-05, + "loss": 0.028656005859375, + "step": 7501 + }, + { + "epoch": 0.5070974719480871, + "grad_norm": 0.49135079979896545, + "learning_rate": 1.5501435145762738e-05, + "loss": 0.06243896484375, + "step": 7502 + }, + { + "epoch": 0.5071650669190212, + "grad_norm": 0.2764626741409302, + "learning_rate": 1.5498137447619607e-05, + "loss": 0.048236846923828125, + "step": 7503 + }, + { + "epoch": 0.5072326618899554, + "grad_norm": 0.8941106200218201, + "learning_rate": 1.5494839725373493e-05, + "loss": 0.15045166015625, + "step": 7504 + }, + { + "epoch": 0.5073002568608895, + "grad_norm": 1.791637659072876, + "learning_rate": 1.5491541979183956e-05, + "loss": 0.153900146484375, + "step": 7505 + }, + { + "epoch": 0.5073678518318238, + "grad_norm": 0.9217272996902466, + "learning_rate": 1.5488244209210554e-05, + "loss": 0.185699462890625, + "step": 7506 + }, + { + "epoch": 0.5074354468027579, + "grad_norm": 0.5729960799217224, + "learning_rate": 1.5484946415612867e-05, + "loss": 0.07399749755859375, + "step": 7507 + }, + { + "epoch": 0.507503041773692, + "grad_norm": 0.35697922110557556, + "learning_rate": 1.5481648598550458e-05, + "loss": 0.0629730224609375, + "step": 7508 + }, + { + "epoch": 0.5075706367446262, + "grad_norm": 0.2943894863128662, + "learning_rate": 1.547835075818289e-05, + "loss": 0.0531463623046875, + "step": 7509 + }, + { + "epoch": 0.5076382317155603, + "grad_norm": 0.8972917199134827, + "learning_rate": 1.547505289466974e-05, + "loss": 0.1593475341796875, + "step": 7510 + }, + { + "epoch": 0.5077058266864946, + "grad_norm": 0.7731946110725403, + "learning_rate": 1.5471755008170575e-05, + "loss": 0.174957275390625, + "step": 7511 + }, + { + "epoch": 0.5077734216574287, + "grad_norm": 0.8431379199028015, + "learning_rate": 1.546845709884497e-05, + "loss": 0.211639404296875, + "step": 7512 + }, + { + "epoch": 0.5078410166283629, + "grad_norm": 0.7441878318786621, + "learning_rate": 1.5465159166852497e-05, + "loss": 0.185821533203125, + "step": 7513 + }, + { + "epoch": 0.507908611599297, + "grad_norm": 0.9871839880943298, + "learning_rate": 1.5461861212352727e-05, + "loss": 0.225860595703125, + "step": 7514 + }, + { + "epoch": 0.5079762065702311, + "grad_norm": 0.4354122281074524, + "learning_rate": 1.5458563235505246e-05, + "loss": 0.07889175415039062, + "step": 7515 + }, + { + "epoch": 0.5080438015411654, + "grad_norm": 0.9165873527526855, + "learning_rate": 1.5455265236469622e-05, + "loss": 0.2009124755859375, + "step": 7516 + }, + { + "epoch": 0.5081113965120995, + "grad_norm": 0.49113568663597107, + "learning_rate": 1.5451967215405432e-05, + "loss": 0.09217071533203125, + "step": 7517 + }, + { + "epoch": 0.5081789914830337, + "grad_norm": 0.6673751473426819, + "learning_rate": 1.544866917247226e-05, + "loss": 0.13788604736328125, + "step": 7518 + }, + { + "epoch": 0.5082465864539678, + "grad_norm": 0.3001113533973694, + "learning_rate": 1.544537110782968e-05, + "loss": 0.0546417236328125, + "step": 7519 + }, + { + "epoch": 0.508314181424902, + "grad_norm": 0.28554806113243103, + "learning_rate": 1.5442073021637282e-05, + "loss": 0.05551910400390625, + "step": 7520 + }, + { + "epoch": 0.5083817763958361, + "grad_norm": 0.6478225588798523, + "learning_rate": 1.543877491405464e-05, + "loss": 0.11220550537109375, + "step": 7521 + }, + { + "epoch": 0.5084493713667703, + "grad_norm": 0.2534574568271637, + "learning_rate": 1.5435476785241334e-05, + "loss": 0.0551910400390625, + "step": 7522 + }, + { + "epoch": 0.5085169663377045, + "grad_norm": 0.5754841566085815, + "learning_rate": 1.5432178635356955e-05, + "loss": 0.143798828125, + "step": 7523 + }, + { + "epoch": 0.5085845613086386, + "grad_norm": 0.5541411638259888, + "learning_rate": 1.5428880464561087e-05, + "loss": 0.129638671875, + "step": 7524 + }, + { + "epoch": 0.5086521562795728, + "grad_norm": 0.4811273515224457, + "learning_rate": 1.5425582273013316e-05, + "loss": 0.10202217102050781, + "step": 7525 + }, + { + "epoch": 0.508719751250507, + "grad_norm": 0.5575686693191528, + "learning_rate": 1.5422284060873228e-05, + "loss": 0.10763835906982422, + "step": 7526 + }, + { + "epoch": 0.5087873462214412, + "grad_norm": 0.41976094245910645, + "learning_rate": 1.541898582830041e-05, + "loss": 0.0828094482421875, + "step": 7527 + }, + { + "epoch": 0.5088549411923753, + "grad_norm": 0.3466111719608307, + "learning_rate": 1.541568757545445e-05, + "loss": 0.072662353515625, + "step": 7528 + }, + { + "epoch": 0.5089225361633094, + "grad_norm": 0.9787290692329407, + "learning_rate": 1.541238930249494e-05, + "loss": 0.1602783203125, + "step": 7529 + }, + { + "epoch": 0.5089901311342436, + "grad_norm": 0.34804633259773254, + "learning_rate": 1.5409091009581474e-05, + "loss": 0.066864013671875, + "step": 7530 + }, + { + "epoch": 0.5090577261051777, + "grad_norm": 0.265018492937088, + "learning_rate": 1.540579269687364e-05, + "loss": 0.05448150634765625, + "step": 7531 + }, + { + "epoch": 0.509125321076112, + "grad_norm": 0.7694851160049438, + "learning_rate": 1.540249436453103e-05, + "loss": 0.132720947265625, + "step": 7532 + }, + { + "epoch": 0.5091929160470461, + "grad_norm": 0.6944177150726318, + "learning_rate": 1.5399196012713245e-05, + "loss": 0.15814208984375, + "step": 7533 + }, + { + "epoch": 0.5092605110179803, + "grad_norm": 0.30489999055862427, + "learning_rate": 1.5395897641579866e-05, + "loss": 0.05499267578125, + "step": 7534 + }, + { + "epoch": 0.5093281059889144, + "grad_norm": 0.7535171508789062, + "learning_rate": 1.53925992512905e-05, + "loss": 0.159942626953125, + "step": 7535 + }, + { + "epoch": 0.5093957009598485, + "grad_norm": 0.6406918168067932, + "learning_rate": 1.538930084200474e-05, + "loss": 0.10375213623046875, + "step": 7536 + }, + { + "epoch": 0.5094632959307828, + "grad_norm": 2.019637107849121, + "learning_rate": 1.538600241388219e-05, + "loss": 0.2628936767578125, + "step": 7537 + }, + { + "epoch": 0.5095308909017169, + "grad_norm": 0.4369509518146515, + "learning_rate": 1.5382703967082438e-05, + "loss": 0.09036636352539062, + "step": 7538 + }, + { + "epoch": 0.5095984858726511, + "grad_norm": 0.5757777690887451, + "learning_rate": 1.5379405501765083e-05, + "loss": 0.12326240539550781, + "step": 7539 + }, + { + "epoch": 0.5096660808435852, + "grad_norm": 0.6141274571418762, + "learning_rate": 1.537610701808974e-05, + "loss": 0.1257781982421875, + "step": 7540 + }, + { + "epoch": 0.5097336758145194, + "grad_norm": 0.3394142985343933, + "learning_rate": 1.5372808516215998e-05, + "loss": 0.08458709716796875, + "step": 7541 + }, + { + "epoch": 0.5098012707854536, + "grad_norm": 0.43178918957710266, + "learning_rate": 1.536950999630346e-05, + "loss": 0.121673583984375, + "step": 7542 + }, + { + "epoch": 0.5098688657563877, + "grad_norm": 1.1895591020584106, + "learning_rate": 1.536621145851173e-05, + "loss": 0.232757568359375, + "step": 7543 + }, + { + "epoch": 0.5099364607273219, + "grad_norm": 1.1520925760269165, + "learning_rate": 1.5362912903000413e-05, + "loss": 0.1576385498046875, + "step": 7544 + }, + { + "epoch": 0.510004055698256, + "grad_norm": 0.3870786726474762, + "learning_rate": 1.5359614329929114e-05, + "loss": 0.0643310546875, + "step": 7545 + }, + { + "epoch": 0.5100716506691902, + "grad_norm": 0.5933229327201843, + "learning_rate": 1.5356315739457437e-05, + "loss": 0.07569122314453125, + "step": 7546 + }, + { + "epoch": 0.5101392456401244, + "grad_norm": 0.30056068301200867, + "learning_rate": 1.5353017131744987e-05, + "loss": 0.0560150146484375, + "step": 7547 + }, + { + "epoch": 0.5102068406110586, + "grad_norm": 0.3923758864402771, + "learning_rate": 1.5349718506951373e-05, + "loss": 0.06772613525390625, + "step": 7548 + }, + { + "epoch": 0.5102744355819927, + "grad_norm": 0.20546048879623413, + "learning_rate": 1.53464198652362e-05, + "loss": 0.04154777526855469, + "step": 7549 + }, + { + "epoch": 0.5103420305529268, + "grad_norm": 1.4106380939483643, + "learning_rate": 1.5343121206759085e-05, + "loss": 0.16717529296875, + "step": 7550 + }, + { + "epoch": 0.510409625523861, + "grad_norm": 0.6647523045539856, + "learning_rate": 1.5339822531679632e-05, + "loss": 0.08443450927734375, + "step": 7551 + }, + { + "epoch": 0.5104772204947952, + "grad_norm": 1.0398776531219482, + "learning_rate": 1.5336523840157446e-05, + "loss": 0.09661865234375, + "step": 7552 + }, + { + "epoch": 0.5105448154657294, + "grad_norm": 0.4996851086616516, + "learning_rate": 1.5333225132352143e-05, + "loss": 0.088592529296875, + "step": 7553 + }, + { + "epoch": 0.5106124104366635, + "grad_norm": 0.7386613488197327, + "learning_rate": 1.5329926408423344e-05, + "loss": 0.1486358642578125, + "step": 7554 + }, + { + "epoch": 0.5106800054075977, + "grad_norm": 0.6299281120300293, + "learning_rate": 1.532662766853065e-05, + "loss": 0.12482643127441406, + "step": 7555 + }, + { + "epoch": 0.5107476003785318, + "grad_norm": 0.9251658916473389, + "learning_rate": 1.5323328912833678e-05, + "loss": 0.1777191162109375, + "step": 7556 + }, + { + "epoch": 0.510815195349466, + "grad_norm": 0.8413209319114685, + "learning_rate": 1.532003014149204e-05, + "loss": 0.12253189086914062, + "step": 7557 + }, + { + "epoch": 0.5108827903204002, + "grad_norm": 0.4134519100189209, + "learning_rate": 1.5316731354665357e-05, + "loss": 0.07436561584472656, + "step": 7558 + }, + { + "epoch": 0.5109503852913343, + "grad_norm": 0.2794690430164337, + "learning_rate": 1.531343255251324e-05, + "loss": 0.02948760986328125, + "step": 7559 + }, + { + "epoch": 0.5110179802622685, + "grad_norm": 0.27622678875923157, + "learning_rate": 1.5310133735195306e-05, + "loss": 0.0482635498046875, + "step": 7560 + }, + { + "epoch": 0.5110855752332026, + "grad_norm": 0.32469964027404785, + "learning_rate": 1.5306834902871175e-05, + "loss": 0.0591583251953125, + "step": 7561 + }, + { + "epoch": 0.5111531702041369, + "grad_norm": 1.2211123704910278, + "learning_rate": 1.530353605570046e-05, + "loss": 0.1413421630859375, + "step": 7562 + }, + { + "epoch": 0.511220765175071, + "grad_norm": 0.17633067071437836, + "learning_rate": 1.5300237193842786e-05, + "loss": 0.030292510986328125, + "step": 7563 + }, + { + "epoch": 0.5112883601460051, + "grad_norm": 0.5185717344284058, + "learning_rate": 1.529693831745777e-05, + "loss": 0.10372161865234375, + "step": 7564 + }, + { + "epoch": 0.5113559551169393, + "grad_norm": 0.7340977191925049, + "learning_rate": 1.5293639426705026e-05, + "loss": 0.14170455932617188, + "step": 7565 + }, + { + "epoch": 0.5114235500878734, + "grad_norm": 0.30680498480796814, + "learning_rate": 1.529034052174419e-05, + "loss": 0.04547119140625, + "step": 7566 + }, + { + "epoch": 0.5114911450588077, + "grad_norm": 0.33709046244621277, + "learning_rate": 1.528704160273487e-05, + "loss": 0.0536956787109375, + "step": 7567 + }, + { + "epoch": 0.5115587400297418, + "grad_norm": 0.7262825965881348, + "learning_rate": 1.528374266983669e-05, + "loss": 0.118072509765625, + "step": 7568 + }, + { + "epoch": 0.511626335000676, + "grad_norm": 0.405687540769577, + "learning_rate": 1.528044372320927e-05, + "loss": 0.06511688232421875, + "step": 7569 + }, + { + "epoch": 0.5116939299716101, + "grad_norm": 0.7993893623352051, + "learning_rate": 1.5277144763012246e-05, + "loss": 0.1116485595703125, + "step": 7570 + }, + { + "epoch": 0.5117615249425442, + "grad_norm": 0.4313334822654724, + "learning_rate": 1.5273845789405236e-05, + "loss": 0.10547637939453125, + "step": 7571 + }, + { + "epoch": 0.5118291199134785, + "grad_norm": 0.8630416393280029, + "learning_rate": 1.5270546802547862e-05, + "loss": 0.168701171875, + "step": 7572 + }, + { + "epoch": 0.5118967148844126, + "grad_norm": 0.49923402070999146, + "learning_rate": 1.5267247802599744e-05, + "loss": 0.05498695373535156, + "step": 7573 + }, + { + "epoch": 0.5119643098553468, + "grad_norm": 0.28556397557258606, + "learning_rate": 1.5263948789720526e-05, + "loss": 0.05899810791015625, + "step": 7574 + }, + { + "epoch": 0.5120319048262809, + "grad_norm": 0.41940271854400635, + "learning_rate": 1.5260649764069816e-05, + "loss": 0.08328628540039062, + "step": 7575 + }, + { + "epoch": 0.5120994997972151, + "grad_norm": 0.798382043838501, + "learning_rate": 1.5257350725807256e-05, + "loss": 0.14847803115844727, + "step": 7576 + }, + { + "epoch": 0.5121670947681493, + "grad_norm": 0.28430086374282837, + "learning_rate": 1.5254051675092462e-05, + "loss": 0.06742095947265625, + "step": 7577 + }, + { + "epoch": 0.5122346897390834, + "grad_norm": 1.4028801918029785, + "learning_rate": 1.525075261208507e-05, + "loss": 0.215606689453125, + "step": 7578 + }, + { + "epoch": 0.5123022847100176, + "grad_norm": 0.8823656439781189, + "learning_rate": 1.524745353694471e-05, + "loss": 0.12198638916015625, + "step": 7579 + }, + { + "epoch": 0.5123698796809517, + "grad_norm": 0.4641636312007904, + "learning_rate": 1.5244154449831009e-05, + "loss": 0.079864501953125, + "step": 7580 + }, + { + "epoch": 0.5124374746518859, + "grad_norm": 0.25961241126060486, + "learning_rate": 1.524085535090359e-05, + "loss": 0.04505157470703125, + "step": 7581 + }, + { + "epoch": 0.51250506962282, + "grad_norm": 0.5792375206947327, + "learning_rate": 1.5237556240322096e-05, + "loss": 0.1045989990234375, + "step": 7582 + }, + { + "epoch": 0.5125726645937543, + "grad_norm": 0.8830071687698364, + "learning_rate": 1.5234257118246155e-05, + "loss": 0.14003753662109375, + "step": 7583 + }, + { + "epoch": 0.5126402595646884, + "grad_norm": 0.5569111704826355, + "learning_rate": 1.5230957984835397e-05, + "loss": 0.089630126953125, + "step": 7584 + }, + { + "epoch": 0.5127078545356225, + "grad_norm": 0.4341450035572052, + "learning_rate": 1.5227658840249452e-05, + "loss": 0.0916595458984375, + "step": 7585 + }, + { + "epoch": 0.5127754495065567, + "grad_norm": 0.6138651967048645, + "learning_rate": 1.5224359684647962e-05, + "loss": 0.09918975830078125, + "step": 7586 + }, + { + "epoch": 0.5128430444774909, + "grad_norm": 0.3438534140586853, + "learning_rate": 1.5221060518190552e-05, + "loss": 0.05113983154296875, + "step": 7587 + }, + { + "epoch": 0.5129106394484251, + "grad_norm": 0.4674086272716522, + "learning_rate": 1.521776134103686e-05, + "loss": 0.084930419921875, + "step": 7588 + }, + { + "epoch": 0.5129782344193592, + "grad_norm": 0.19113120436668396, + "learning_rate": 1.5214462153346522e-05, + "loss": 0.041927337646484375, + "step": 7589 + }, + { + "epoch": 0.5130458293902934, + "grad_norm": 0.40616166591644287, + "learning_rate": 1.5211162955279167e-05, + "loss": 0.07277679443359375, + "step": 7590 + }, + { + "epoch": 0.5131134243612275, + "grad_norm": 1.09455144405365, + "learning_rate": 1.5207863746994439e-05, + "loss": 0.16016387939453125, + "step": 7591 + }, + { + "epoch": 0.5131810193321616, + "grad_norm": 1.115281343460083, + "learning_rate": 1.520456452865197e-05, + "loss": 0.18837738037109375, + "step": 7592 + }, + { + "epoch": 0.5132486143030959, + "grad_norm": 0.30164194107055664, + "learning_rate": 1.5201265300411394e-05, + "loss": 0.0624237060546875, + "step": 7593 + }, + { + "epoch": 0.51331620927403, + "grad_norm": 0.2644381821155548, + "learning_rate": 1.5197966062432352e-05, + "loss": 0.05440521240234375, + "step": 7594 + }, + { + "epoch": 0.5133838042449642, + "grad_norm": 0.4152495563030243, + "learning_rate": 1.5194666814874483e-05, + "loss": 0.05611419677734375, + "step": 7595 + }, + { + "epoch": 0.5134513992158983, + "grad_norm": 1.330315113067627, + "learning_rate": 1.519136755789742e-05, + "loss": 0.23583984375, + "step": 7596 + }, + { + "epoch": 0.5135189941868324, + "grad_norm": 0.2641991078853607, + "learning_rate": 1.5188068291660807e-05, + "loss": 0.0482635498046875, + "step": 7597 + }, + { + "epoch": 0.5135865891577667, + "grad_norm": 0.5578904151916504, + "learning_rate": 1.5184769016324277e-05, + "loss": 0.10759735107421875, + "step": 7598 + }, + { + "epoch": 0.5136541841287008, + "grad_norm": 0.6825937628746033, + "learning_rate": 1.5181469732047476e-05, + "loss": 0.139312744140625, + "step": 7599 + }, + { + "epoch": 0.513721779099635, + "grad_norm": 0.3070925176143646, + "learning_rate": 1.5178170438990039e-05, + "loss": 0.07094955444335938, + "step": 7600 + }, + { + "epoch": 0.5137893740705691, + "grad_norm": 0.36069220304489136, + "learning_rate": 1.5174871137311612e-05, + "loss": 0.072906494140625, + "step": 7601 + }, + { + "epoch": 0.5138569690415034, + "grad_norm": 0.34905749559402466, + "learning_rate": 1.5171571827171828e-05, + "loss": 0.06683349609375, + "step": 7602 + }, + { + "epoch": 0.5139245640124375, + "grad_norm": 0.21254447102546692, + "learning_rate": 1.5168272508730333e-05, + "loss": 0.035167694091796875, + "step": 7603 + }, + { + "epoch": 0.5139921589833716, + "grad_norm": 0.7381553053855896, + "learning_rate": 1.5164973182146767e-05, + "loss": 0.144805908203125, + "step": 7604 + }, + { + "epoch": 0.5140597539543058, + "grad_norm": 1.0182751417160034, + "learning_rate": 1.5161673847580773e-05, + "loss": 0.1572265625, + "step": 7605 + }, + { + "epoch": 0.5141273489252399, + "grad_norm": 0.5590996742248535, + "learning_rate": 1.515837450519199e-05, + "loss": 0.12093353271484375, + "step": 7606 + }, + { + "epoch": 0.5141949438961742, + "grad_norm": 0.23378562927246094, + "learning_rate": 1.5155075155140068e-05, + "loss": 0.04310798645019531, + "step": 7607 + }, + { + "epoch": 0.5142625388671083, + "grad_norm": 0.7112851142883301, + "learning_rate": 1.5151775797584644e-05, + "loss": 0.1265106201171875, + "step": 7608 + }, + { + "epoch": 0.5143301338380425, + "grad_norm": 0.5600508451461792, + "learning_rate": 1.5148476432685365e-05, + "loss": 0.1226348876953125, + "step": 7609 + }, + { + "epoch": 0.5143977288089766, + "grad_norm": 0.46760696172714233, + "learning_rate": 1.514517706060187e-05, + "loss": 0.1093597412109375, + "step": 7610 + }, + { + "epoch": 0.5144653237799107, + "grad_norm": 0.7886398434638977, + "learning_rate": 1.5141877681493804e-05, + "loss": 0.14236831665039062, + "step": 7611 + }, + { + "epoch": 0.514532918750845, + "grad_norm": 0.613448441028595, + "learning_rate": 1.513857829552082e-05, + "loss": 0.10588455200195312, + "step": 7612 + }, + { + "epoch": 0.5146005137217791, + "grad_norm": 0.7413811683654785, + "learning_rate": 1.513527890284255e-05, + "loss": 0.15225982666015625, + "step": 7613 + }, + { + "epoch": 0.5146681086927133, + "grad_norm": 0.5627806186676025, + "learning_rate": 1.513197950361865e-05, + "loss": 0.1341094970703125, + "step": 7614 + }, + { + "epoch": 0.5147357036636474, + "grad_norm": 0.18000482022762299, + "learning_rate": 1.512868009800876e-05, + "loss": 0.023555755615234375, + "step": 7615 + }, + { + "epoch": 0.5148032986345816, + "grad_norm": 0.2257617563009262, + "learning_rate": 1.5125380686172525e-05, + "loss": 0.034435272216796875, + "step": 7616 + }, + { + "epoch": 0.5148708936055157, + "grad_norm": 0.3670973479747772, + "learning_rate": 1.5122081268269593e-05, + "loss": 0.080078125, + "step": 7617 + }, + { + "epoch": 0.5149384885764499, + "grad_norm": 0.6529722809791565, + "learning_rate": 1.5118781844459612e-05, + "loss": 0.08817291259765625, + "step": 7618 + }, + { + "epoch": 0.5150060835473841, + "grad_norm": 0.3313761353492737, + "learning_rate": 1.511548241490222e-05, + "loss": 0.09140777587890625, + "step": 7619 + }, + { + "epoch": 0.5150736785183182, + "grad_norm": 0.3660167157649994, + "learning_rate": 1.5112182979757074e-05, + "loss": 0.05944061279296875, + "step": 7620 + }, + { + "epoch": 0.5151412734892524, + "grad_norm": 0.8749839067459106, + "learning_rate": 1.510888353918382e-05, + "loss": 0.171234130859375, + "step": 7621 + }, + { + "epoch": 0.5152088684601865, + "grad_norm": 0.29855427145957947, + "learning_rate": 1.5105584093342105e-05, + "loss": 0.03122711181640625, + "step": 7622 + }, + { + "epoch": 0.5152764634311208, + "grad_norm": 1.271996259689331, + "learning_rate": 1.510228464239157e-05, + "loss": 0.21185302734375, + "step": 7623 + }, + { + "epoch": 0.5153440584020549, + "grad_norm": 0.6792632341384888, + "learning_rate": 1.5098985186491869e-05, + "loss": 0.104644775390625, + "step": 7624 + }, + { + "epoch": 0.515411653372989, + "grad_norm": 0.7669714689254761, + "learning_rate": 1.5095685725802653e-05, + "loss": 0.12371826171875, + "step": 7625 + }, + { + "epoch": 0.5154792483439232, + "grad_norm": 0.2643773853778839, + "learning_rate": 1.5092386260483565e-05, + "loss": 0.0386810302734375, + "step": 7626 + }, + { + "epoch": 0.5155468433148573, + "grad_norm": 0.24787674844264984, + "learning_rate": 1.5089086790694253e-05, + "loss": 0.04062652587890625, + "step": 7627 + }, + { + "epoch": 0.5156144382857916, + "grad_norm": 0.4223792850971222, + "learning_rate": 1.5085787316594368e-05, + "loss": 0.08087158203125, + "step": 7628 + }, + { + "epoch": 0.5156820332567257, + "grad_norm": 0.942592978477478, + "learning_rate": 1.508248783834356e-05, + "loss": 0.1059417724609375, + "step": 7629 + }, + { + "epoch": 0.5157496282276599, + "grad_norm": 0.5381718873977661, + "learning_rate": 1.5079188356101477e-05, + "loss": 0.1494293212890625, + "step": 7630 + }, + { + "epoch": 0.515817223198594, + "grad_norm": 0.4762164056301117, + "learning_rate": 1.5075888870027767e-05, + "loss": 0.0958099365234375, + "step": 7631 + }, + { + "epoch": 0.5158848181695281, + "grad_norm": 0.9019644856452942, + "learning_rate": 1.5072589380282084e-05, + "loss": 0.13865280151367188, + "step": 7632 + }, + { + "epoch": 0.5159524131404624, + "grad_norm": 0.3500880300998688, + "learning_rate": 1.5069289887024078e-05, + "loss": 0.05964088439941406, + "step": 7633 + }, + { + "epoch": 0.5160200081113965, + "grad_norm": 0.9910226464271545, + "learning_rate": 1.5065990390413393e-05, + "loss": 0.20086669921875, + "step": 7634 + }, + { + "epoch": 0.5160876030823307, + "grad_norm": 0.2637898921966553, + "learning_rate": 1.5062690890609682e-05, + "loss": 0.03926849365234375, + "step": 7635 + }, + { + "epoch": 0.5161551980532648, + "grad_norm": 0.5454452037811279, + "learning_rate": 1.5059391387772595e-05, + "loss": 0.128021240234375, + "step": 7636 + }, + { + "epoch": 0.516222793024199, + "grad_norm": 0.7347771525382996, + "learning_rate": 1.505609188206179e-05, + "loss": 0.1278839111328125, + "step": 7637 + }, + { + "epoch": 0.5162903879951332, + "grad_norm": 0.6004359126091003, + "learning_rate": 1.5052792373636905e-05, + "loss": 0.13262939453125, + "step": 7638 + }, + { + "epoch": 0.5163579829660673, + "grad_norm": 0.3324752151966095, + "learning_rate": 1.5049492862657602e-05, + "loss": 0.06603240966796875, + "step": 7639 + }, + { + "epoch": 0.5164255779370015, + "grad_norm": 1.1491647958755493, + "learning_rate": 1.5046193349283518e-05, + "loss": 0.142913818359375, + "step": 7640 + }, + { + "epoch": 0.5164931729079356, + "grad_norm": 0.5429760813713074, + "learning_rate": 1.5042893833674318e-05, + "loss": 0.114349365234375, + "step": 7641 + }, + { + "epoch": 0.5165607678788698, + "grad_norm": 0.9184871315956116, + "learning_rate": 1.5039594315989645e-05, + "loss": 0.15692901611328125, + "step": 7642 + }, + { + "epoch": 0.516628362849804, + "grad_norm": 0.6411252021789551, + "learning_rate": 1.5036294796389156e-05, + "loss": 0.10943603515625, + "step": 7643 + }, + { + "epoch": 0.5166959578207382, + "grad_norm": 0.36787930130958557, + "learning_rate": 1.5032995275032492e-05, + "loss": 0.08412933349609375, + "step": 7644 + }, + { + "epoch": 0.5167635527916723, + "grad_norm": 0.3069547116756439, + "learning_rate": 1.502969575207932e-05, + "loss": 0.05196380615234375, + "step": 7645 + }, + { + "epoch": 0.5168311477626064, + "grad_norm": 0.6394796967506409, + "learning_rate": 1.502639622768928e-05, + "loss": 0.12347412109375, + "step": 7646 + }, + { + "epoch": 0.5168987427335406, + "grad_norm": 0.67860347032547, + "learning_rate": 1.5023096702022024e-05, + "loss": 0.1141510009765625, + "step": 7647 + }, + { + "epoch": 0.5169663377044748, + "grad_norm": 0.2819996476173401, + "learning_rate": 1.501979717523721e-05, + "loss": 0.05199432373046875, + "step": 7648 + }, + { + "epoch": 0.517033932675409, + "grad_norm": 0.45952606201171875, + "learning_rate": 1.5016497647494481e-05, + "loss": 0.08475494384765625, + "step": 7649 + }, + { + "epoch": 0.5171015276463431, + "grad_norm": 0.5337870121002197, + "learning_rate": 1.5013198118953499e-05, + "loss": 0.1262664794921875, + "step": 7650 + }, + { + "epoch": 0.5171691226172773, + "grad_norm": 1.0533164739608765, + "learning_rate": 1.5009898589773906e-05, + "loss": 0.197967529296875, + "step": 7651 + }, + { + "epoch": 0.5172367175882114, + "grad_norm": 0.7943223714828491, + "learning_rate": 1.5006599060115353e-05, + "loss": 0.1396484375, + "step": 7652 + }, + { + "epoch": 0.5173043125591456, + "grad_norm": 0.7027508616447449, + "learning_rate": 1.5003299530137503e-05, + "loss": 0.11108207702636719, + "step": 7653 + }, + { + "epoch": 0.5173719075300798, + "grad_norm": 0.5392756462097168, + "learning_rate": 1.5e-05, + "loss": 0.13189697265625, + "step": 7654 + }, + { + "epoch": 0.5174395025010139, + "grad_norm": 0.2125377207994461, + "learning_rate": 1.4996700469862503e-05, + "loss": 0.02622509002685547, + "step": 7655 + }, + { + "epoch": 0.5175070974719481, + "grad_norm": 0.32986193895339966, + "learning_rate": 1.4993400939884648e-05, + "loss": 0.0815277099609375, + "step": 7656 + }, + { + "epoch": 0.5175746924428822, + "grad_norm": 0.4847750961780548, + "learning_rate": 1.49901014102261e-05, + "loss": 0.09783935546875, + "step": 7657 + }, + { + "epoch": 0.5176422874138165, + "grad_norm": 0.6961401104927063, + "learning_rate": 1.4986801881046504e-05, + "loss": 0.12328338623046875, + "step": 7658 + }, + { + "epoch": 0.5177098823847506, + "grad_norm": 0.5276845693588257, + "learning_rate": 1.4983502352505523e-05, + "loss": 0.14023590087890625, + "step": 7659 + }, + { + "epoch": 0.5177774773556847, + "grad_norm": 0.31577080488204956, + "learning_rate": 1.4980202824762792e-05, + "loss": 0.051311492919921875, + "step": 7660 + }, + { + "epoch": 0.5178450723266189, + "grad_norm": 0.6788255572319031, + "learning_rate": 1.4976903297977978e-05, + "loss": 0.1773529052734375, + "step": 7661 + }, + { + "epoch": 0.517912667297553, + "grad_norm": 0.4378986954689026, + "learning_rate": 1.4973603772310721e-05, + "loss": 0.06308746337890625, + "step": 7662 + }, + { + "epoch": 0.5179802622684873, + "grad_norm": 0.6528909802436829, + "learning_rate": 1.4970304247920684e-05, + "loss": 0.11654853820800781, + "step": 7663 + }, + { + "epoch": 0.5180478572394214, + "grad_norm": 0.2905597984790802, + "learning_rate": 1.4967004724967505e-05, + "loss": 0.042919158935546875, + "step": 7664 + }, + { + "epoch": 0.5181154522103556, + "grad_norm": 0.8668391108512878, + "learning_rate": 1.4963705203610846e-05, + "loss": 0.1583404541015625, + "step": 7665 + }, + { + "epoch": 0.5181830471812897, + "grad_norm": 0.4615239202976227, + "learning_rate": 1.4960405684010359e-05, + "loss": 0.10186004638671875, + "step": 7666 + }, + { + "epoch": 0.5182506421522238, + "grad_norm": 0.2484191507101059, + "learning_rate": 1.4957106166325686e-05, + "loss": 0.049224853515625, + "step": 7667 + }, + { + "epoch": 0.518318237123158, + "grad_norm": 0.5950817465782166, + "learning_rate": 1.4953806650716486e-05, + "loss": 0.156341552734375, + "step": 7668 + }, + { + "epoch": 0.5183858320940922, + "grad_norm": 0.8227689266204834, + "learning_rate": 1.4950507137342402e-05, + "loss": 0.18145751953125, + "step": 7669 + }, + { + "epoch": 0.5184534270650264, + "grad_norm": 0.4843420684337616, + "learning_rate": 1.4947207626363098e-05, + "loss": 0.061431884765625, + "step": 7670 + }, + { + "epoch": 0.5185210220359605, + "grad_norm": 0.7475500702857971, + "learning_rate": 1.4943908117938213e-05, + "loss": 0.11168289184570312, + "step": 7671 + }, + { + "epoch": 0.5185886170068947, + "grad_norm": 0.39825284481048584, + "learning_rate": 1.4940608612227407e-05, + "loss": 0.06077766418457031, + "step": 7672 + }, + { + "epoch": 0.5186562119778289, + "grad_norm": 0.9261006116867065, + "learning_rate": 1.4937309109390319e-05, + "loss": 0.2161865234375, + "step": 7673 + }, + { + "epoch": 0.518723806948763, + "grad_norm": 0.6327972412109375, + "learning_rate": 1.493400960958661e-05, + "loss": 0.12031173706054688, + "step": 7674 + }, + { + "epoch": 0.5187914019196972, + "grad_norm": 0.5792518854141235, + "learning_rate": 1.4930710112975923e-05, + "loss": 0.11255645751953125, + "step": 7675 + }, + { + "epoch": 0.5188589968906313, + "grad_norm": 1.2616451978683472, + "learning_rate": 1.4927410619717918e-05, + "loss": 0.25, + "step": 7676 + }, + { + "epoch": 0.5189265918615655, + "grad_norm": 0.5237900018692017, + "learning_rate": 1.4924111129972232e-05, + "loss": 0.14125442504882812, + "step": 7677 + }, + { + "epoch": 0.5189941868324996, + "grad_norm": 0.9029098749160767, + "learning_rate": 1.4920811643898524e-05, + "loss": 0.10474395751953125, + "step": 7678 + }, + { + "epoch": 0.5190617818034339, + "grad_norm": 0.5344755053520203, + "learning_rate": 1.4917512161656445e-05, + "loss": 0.090972900390625, + "step": 7679 + }, + { + "epoch": 0.519129376774368, + "grad_norm": 0.5958898067474365, + "learning_rate": 1.4914212683405635e-05, + "loss": 0.10326766967773438, + "step": 7680 + }, + { + "epoch": 0.5191969717453021, + "grad_norm": 0.9594732522964478, + "learning_rate": 1.491091320930575e-05, + "loss": 0.15776824951171875, + "step": 7681 + }, + { + "epoch": 0.5192645667162363, + "grad_norm": 0.4521099627017975, + "learning_rate": 1.4907613739516438e-05, + "loss": 0.08322906494140625, + "step": 7682 + }, + { + "epoch": 0.5193321616871704, + "grad_norm": 0.2118271291255951, + "learning_rate": 1.4904314274197352e-05, + "loss": 0.04470062255859375, + "step": 7683 + }, + { + "epoch": 0.5193997566581047, + "grad_norm": 0.3090468645095825, + "learning_rate": 1.490101481350813e-05, + "loss": 0.0552978515625, + "step": 7684 + }, + { + "epoch": 0.5194673516290388, + "grad_norm": 0.3520105183124542, + "learning_rate": 1.4897715357608434e-05, + "loss": 0.033985137939453125, + "step": 7685 + }, + { + "epoch": 0.519534946599973, + "grad_norm": 0.6199215054512024, + "learning_rate": 1.4894415906657899e-05, + "loss": 0.10056304931640625, + "step": 7686 + }, + { + "epoch": 0.5196025415709071, + "grad_norm": 0.6126744747161865, + "learning_rate": 1.4891116460816182e-05, + "loss": 0.1332244873046875, + "step": 7687 + }, + { + "epoch": 0.5196701365418412, + "grad_norm": 0.6257967352867126, + "learning_rate": 1.4887817020242923e-05, + "loss": 0.166839599609375, + "step": 7688 + }, + { + "epoch": 0.5197377315127755, + "grad_norm": 0.29839882254600525, + "learning_rate": 1.4884517585097782e-05, + "loss": 0.056903839111328125, + "step": 7689 + }, + { + "epoch": 0.5198053264837096, + "grad_norm": 0.33983349800109863, + "learning_rate": 1.4881218155540396e-05, + "loss": 0.0662689208984375, + "step": 7690 + }, + { + "epoch": 0.5198729214546438, + "grad_norm": 0.7948500514030457, + "learning_rate": 1.487791873173041e-05, + "loss": 0.10297393798828125, + "step": 7691 + }, + { + "epoch": 0.5199405164255779, + "grad_norm": 0.7610653638839722, + "learning_rate": 1.4874619313827483e-05, + "loss": 0.143096923828125, + "step": 7692 + }, + { + "epoch": 0.5200081113965122, + "grad_norm": 0.654682457447052, + "learning_rate": 1.4871319901991245e-05, + "loss": 0.097503662109375, + "step": 7693 + }, + { + "epoch": 0.5200757063674463, + "grad_norm": 0.340091347694397, + "learning_rate": 1.4868020496381353e-05, + "loss": 0.07685089111328125, + "step": 7694 + }, + { + "epoch": 0.5201433013383804, + "grad_norm": 0.6651535034179688, + "learning_rate": 1.486472109715745e-05, + "loss": 0.08205223083496094, + "step": 7695 + }, + { + "epoch": 0.5202108963093146, + "grad_norm": 0.8437504172325134, + "learning_rate": 1.4861421704479185e-05, + "loss": 0.1486663818359375, + "step": 7696 + }, + { + "epoch": 0.5202784912802487, + "grad_norm": 1.3321983814239502, + "learning_rate": 1.4858122318506195e-05, + "loss": 0.207061767578125, + "step": 7697 + }, + { + "epoch": 0.520346086251183, + "grad_norm": 0.44716590642929077, + "learning_rate": 1.4854822939398132e-05, + "loss": 0.0788116455078125, + "step": 7698 + }, + { + "epoch": 0.5204136812221171, + "grad_norm": 0.4129875898361206, + "learning_rate": 1.4851523567314634e-05, + "loss": 0.1078948974609375, + "step": 7699 + }, + { + "epoch": 0.5204812761930513, + "grad_norm": 0.2181268185377121, + "learning_rate": 1.4848224202415357e-05, + "loss": 0.026384353637695312, + "step": 7700 + }, + { + "epoch": 0.5205488711639854, + "grad_norm": 1.1832809448242188, + "learning_rate": 1.4844924844859938e-05, + "loss": 0.188507080078125, + "step": 7701 + }, + { + "epoch": 0.5206164661349195, + "grad_norm": 0.2227616310119629, + "learning_rate": 1.4841625494808012e-05, + "loss": 0.05098152160644531, + "step": 7702 + }, + { + "epoch": 0.5206840611058537, + "grad_norm": 0.387546569108963, + "learning_rate": 1.4838326152419231e-05, + "loss": 0.07243728637695312, + "step": 7703 + }, + { + "epoch": 0.5207516560767879, + "grad_norm": 0.8042672276496887, + "learning_rate": 1.4835026817853235e-05, + "loss": 0.152984619140625, + "step": 7704 + }, + { + "epoch": 0.5208192510477221, + "grad_norm": 0.9374739527702332, + "learning_rate": 1.4831727491269673e-05, + "loss": 0.1443328857421875, + "step": 7705 + }, + { + "epoch": 0.5208868460186562, + "grad_norm": 0.18181714415550232, + "learning_rate": 1.4828428172828176e-05, + "loss": 0.03704833984375, + "step": 7706 + }, + { + "epoch": 0.5209544409895904, + "grad_norm": 0.1999843567609787, + "learning_rate": 1.4825128862688393e-05, + "loss": 0.032642364501953125, + "step": 7707 + }, + { + "epoch": 0.5210220359605245, + "grad_norm": 0.4692314565181732, + "learning_rate": 1.482182956100996e-05, + "loss": 0.0938262939453125, + "step": 7708 + }, + { + "epoch": 0.5210896309314587, + "grad_norm": 0.33428284525871277, + "learning_rate": 1.4818530267952528e-05, + "loss": 0.06542205810546875, + "step": 7709 + }, + { + "epoch": 0.5211572259023929, + "grad_norm": 0.3990718126296997, + "learning_rate": 1.4815230983675722e-05, + "loss": 0.06394195556640625, + "step": 7710 + }, + { + "epoch": 0.521224820873327, + "grad_norm": 0.7939885854721069, + "learning_rate": 1.4811931708339196e-05, + "loss": 0.172882080078125, + "step": 7711 + }, + { + "epoch": 0.5212924158442612, + "grad_norm": 0.34733065962791443, + "learning_rate": 1.4808632442102579e-05, + "loss": 0.08452606201171875, + "step": 7712 + }, + { + "epoch": 0.5213600108151953, + "grad_norm": 0.7406485676765442, + "learning_rate": 1.4805333185125521e-05, + "loss": 0.10868072509765625, + "step": 7713 + }, + { + "epoch": 0.5214276057861296, + "grad_norm": 0.38178789615631104, + "learning_rate": 1.4802033937567652e-05, + "loss": 0.0705718994140625, + "step": 7714 + }, + { + "epoch": 0.5214952007570637, + "grad_norm": 0.26137611269950867, + "learning_rate": 1.4798734699588607e-05, + "loss": 0.04985809326171875, + "step": 7715 + }, + { + "epoch": 0.5215627957279978, + "grad_norm": 0.6928391456604004, + "learning_rate": 1.4795435471348036e-05, + "loss": 0.1420135498046875, + "step": 7716 + }, + { + "epoch": 0.521630390698932, + "grad_norm": 0.35133302211761475, + "learning_rate": 1.4792136253005564e-05, + "loss": 0.09383392333984375, + "step": 7717 + }, + { + "epoch": 0.5216979856698661, + "grad_norm": 0.7723799347877502, + "learning_rate": 1.4788837044720837e-05, + "loss": 0.146453857421875, + "step": 7718 + }, + { + "epoch": 0.5217655806408004, + "grad_norm": 0.5908478498458862, + "learning_rate": 1.4785537846653482e-05, + "loss": 0.09649658203125, + "step": 7719 + }, + { + "epoch": 0.5218331756117345, + "grad_norm": 0.47642961144447327, + "learning_rate": 1.4782238658963143e-05, + "loss": 0.09515380859375, + "step": 7720 + }, + { + "epoch": 0.5219007705826687, + "grad_norm": 0.8296915888786316, + "learning_rate": 1.4778939481809447e-05, + "loss": 0.1708984375, + "step": 7721 + }, + { + "epoch": 0.5219683655536028, + "grad_norm": 0.32787656784057617, + "learning_rate": 1.4775640315352042e-05, + "loss": 0.04562187194824219, + "step": 7722 + }, + { + "epoch": 0.5220359605245369, + "grad_norm": 1.557283639907837, + "learning_rate": 1.4772341159750547e-05, + "loss": 0.221160888671875, + "step": 7723 + }, + { + "epoch": 0.5221035554954712, + "grad_norm": 0.4229508638381958, + "learning_rate": 1.4769042015164606e-05, + "loss": 0.096099853515625, + "step": 7724 + }, + { + "epoch": 0.5221711504664053, + "grad_norm": 0.9951568841934204, + "learning_rate": 1.476574288175385e-05, + "loss": 0.223297119140625, + "step": 7725 + }, + { + "epoch": 0.5222387454373395, + "grad_norm": 0.7298922538757324, + "learning_rate": 1.4762443759677906e-05, + "loss": 0.1413421630859375, + "step": 7726 + }, + { + "epoch": 0.5223063404082736, + "grad_norm": 0.4222978353500366, + "learning_rate": 1.4759144649096412e-05, + "loss": 0.1101531982421875, + "step": 7727 + }, + { + "epoch": 0.5223739353792077, + "grad_norm": 0.29442694783210754, + "learning_rate": 1.4755845550168995e-05, + "loss": 0.03369140625, + "step": 7728 + }, + { + "epoch": 0.522441530350142, + "grad_norm": 0.7846909761428833, + "learning_rate": 1.4752546463055296e-05, + "loss": 0.14250946044921875, + "step": 7729 + }, + { + "epoch": 0.5225091253210761, + "grad_norm": 0.40148863196372986, + "learning_rate": 1.474924738791493e-05, + "loss": 0.08089447021484375, + "step": 7730 + }, + { + "epoch": 0.5225767202920103, + "grad_norm": 0.34181177616119385, + "learning_rate": 1.4745948324907543e-05, + "loss": 0.0548248291015625, + "step": 7731 + }, + { + "epoch": 0.5226443152629444, + "grad_norm": 0.3611587882041931, + "learning_rate": 1.4742649274192748e-05, + "loss": 0.0673980712890625, + "step": 7732 + }, + { + "epoch": 0.5227119102338786, + "grad_norm": 0.549481213092804, + "learning_rate": 1.4739350235930185e-05, + "loss": 0.1251373291015625, + "step": 7733 + }, + { + "epoch": 0.5227795052048128, + "grad_norm": 1.1539080142974854, + "learning_rate": 1.4736051210279477e-05, + "loss": 0.174835205078125, + "step": 7734 + }, + { + "epoch": 0.5228471001757469, + "grad_norm": 0.24600201845169067, + "learning_rate": 1.4732752197400257e-05, + "loss": 0.04933929443359375, + "step": 7735 + }, + { + "epoch": 0.5229146951466811, + "grad_norm": 0.745082676410675, + "learning_rate": 1.4729453197452147e-05, + "loss": 0.1421356201171875, + "step": 7736 + }, + { + "epoch": 0.5229822901176152, + "grad_norm": 0.9510912299156189, + "learning_rate": 1.4726154210594768e-05, + "loss": 0.186767578125, + "step": 7737 + }, + { + "epoch": 0.5230498850885494, + "grad_norm": 0.3275867700576782, + "learning_rate": 1.4722855236987758e-05, + "loss": 0.048858642578125, + "step": 7738 + }, + { + "epoch": 0.5231174800594836, + "grad_norm": 0.5148094296455383, + "learning_rate": 1.471955627679073e-05, + "loss": 0.1025848388671875, + "step": 7739 + }, + { + "epoch": 0.5231850750304178, + "grad_norm": 0.3074355721473694, + "learning_rate": 1.4716257330163316e-05, + "loss": 0.07079315185546875, + "step": 7740 + }, + { + "epoch": 0.5232526700013519, + "grad_norm": 0.20813210308551788, + "learning_rate": 1.4712958397265133e-05, + "loss": 0.0513763427734375, + "step": 7741 + }, + { + "epoch": 0.523320264972286, + "grad_norm": 0.9646549820899963, + "learning_rate": 1.4709659478255817e-05, + "loss": 0.13555908203125, + "step": 7742 + }, + { + "epoch": 0.5233878599432202, + "grad_norm": 0.862044632434845, + "learning_rate": 1.4706360573294971e-05, + "loss": 0.155303955078125, + "step": 7743 + }, + { + "epoch": 0.5234554549141544, + "grad_norm": 0.39095401763916016, + "learning_rate": 1.4703061682542232e-05, + "loss": 0.085174560546875, + "step": 7744 + }, + { + "epoch": 0.5235230498850886, + "grad_norm": 0.684272825717926, + "learning_rate": 1.4699762806157211e-05, + "loss": 0.130828857421875, + "step": 7745 + }, + { + "epoch": 0.5235906448560227, + "grad_norm": 0.2808794379234314, + "learning_rate": 1.4696463944299543e-05, + "loss": 0.04268836975097656, + "step": 7746 + }, + { + "epoch": 0.5236582398269569, + "grad_norm": 0.4417450428009033, + "learning_rate": 1.4693165097128825e-05, + "loss": 0.09027862548828125, + "step": 7747 + }, + { + "epoch": 0.523725834797891, + "grad_norm": 0.18796394765377045, + "learning_rate": 1.4689866264804697e-05, + "loss": 0.035678863525390625, + "step": 7748 + }, + { + "epoch": 0.5237934297688251, + "grad_norm": 0.3078998029232025, + "learning_rate": 1.4686567447486765e-05, + "loss": 0.046581268310546875, + "step": 7749 + }, + { + "epoch": 0.5238610247397594, + "grad_norm": 0.23525796830654144, + "learning_rate": 1.4683268645334644e-05, + "loss": 0.03702545166015625, + "step": 7750 + }, + { + "epoch": 0.5239286197106935, + "grad_norm": 0.8286679983139038, + "learning_rate": 1.4679969858507964e-05, + "loss": 0.149688720703125, + "step": 7751 + }, + { + "epoch": 0.5239962146816277, + "grad_norm": 1.110282063484192, + "learning_rate": 1.4676671087166325e-05, + "loss": 0.137359619140625, + "step": 7752 + }, + { + "epoch": 0.5240638096525618, + "grad_norm": 0.3236352801322937, + "learning_rate": 1.4673372331469353e-05, + "loss": 0.074249267578125, + "step": 7753 + }, + { + "epoch": 0.5241314046234961, + "grad_norm": 0.29651200771331787, + "learning_rate": 1.4670073591576657e-05, + "loss": 0.04320526123046875, + "step": 7754 + }, + { + "epoch": 0.5241989995944302, + "grad_norm": 0.31424668431282043, + "learning_rate": 1.4666774867647856e-05, + "loss": 0.061370849609375, + "step": 7755 + }, + { + "epoch": 0.5242665945653643, + "grad_norm": 1.2102360725402832, + "learning_rate": 1.4663476159842555e-05, + "loss": 0.212158203125, + "step": 7756 + }, + { + "epoch": 0.5243341895362985, + "grad_norm": 0.9368665814399719, + "learning_rate": 1.4660177468320372e-05, + "loss": 0.1425933837890625, + "step": 7757 + }, + { + "epoch": 0.5244017845072326, + "grad_norm": 1.5700418949127197, + "learning_rate": 1.4656878793240916e-05, + "loss": 0.207916259765625, + "step": 7758 + }, + { + "epoch": 0.5244693794781669, + "grad_norm": 0.986210823059082, + "learning_rate": 1.4653580134763801e-05, + "loss": 0.18878173828125, + "step": 7759 + }, + { + "epoch": 0.524536974449101, + "grad_norm": 1.3071950674057007, + "learning_rate": 1.4650281493048631e-05, + "loss": 0.17842864990234375, + "step": 7760 + }, + { + "epoch": 0.5246045694200352, + "grad_norm": 1.3438678979873657, + "learning_rate": 1.4646982868255015e-05, + "loss": 0.1901702880859375, + "step": 7761 + }, + { + "epoch": 0.5246721643909693, + "grad_norm": 0.8174515962600708, + "learning_rate": 1.464368426054257e-05, + "loss": 0.1868896484375, + "step": 7762 + }, + { + "epoch": 0.5247397593619034, + "grad_norm": 0.9908336997032166, + "learning_rate": 1.4640385670070887e-05, + "loss": 0.18499755859375, + "step": 7763 + }, + { + "epoch": 0.5248073543328377, + "grad_norm": 0.2098744809627533, + "learning_rate": 1.4637087096999591e-05, + "loss": 0.0508880615234375, + "step": 7764 + }, + { + "epoch": 0.5248749493037718, + "grad_norm": 0.8732942342758179, + "learning_rate": 1.4633788541488272e-05, + "loss": 0.1323699951171875, + "step": 7765 + }, + { + "epoch": 0.524942544274706, + "grad_norm": 0.5999093055725098, + "learning_rate": 1.4630490003696542e-05, + "loss": 0.175628662109375, + "step": 7766 + }, + { + "epoch": 0.5250101392456401, + "grad_norm": 0.7538950443267822, + "learning_rate": 1.4627191483784003e-05, + "loss": 0.11153411865234375, + "step": 7767 + }, + { + "epoch": 0.5250777342165743, + "grad_norm": 1.0105950832366943, + "learning_rate": 1.4623892981910261e-05, + "loss": 0.1547393798828125, + "step": 7768 + }, + { + "epoch": 0.5251453291875084, + "grad_norm": 0.551458477973938, + "learning_rate": 1.4620594498234912e-05, + "loss": 0.10400390625, + "step": 7769 + }, + { + "epoch": 0.5252129241584426, + "grad_norm": 0.827481210231781, + "learning_rate": 1.4617296032917564e-05, + "loss": 0.11339950561523438, + "step": 7770 + }, + { + "epoch": 0.5252805191293768, + "grad_norm": 0.5267122387886047, + "learning_rate": 1.4613997586117817e-05, + "loss": 0.1077117919921875, + "step": 7771 + }, + { + "epoch": 0.5253481141003109, + "grad_norm": 0.780246913433075, + "learning_rate": 1.4610699157995261e-05, + "loss": 0.15341567993164062, + "step": 7772 + }, + { + "epoch": 0.5254157090712451, + "grad_norm": 0.44317805767059326, + "learning_rate": 1.4607400748709502e-05, + "loss": 0.1100311279296875, + "step": 7773 + }, + { + "epoch": 0.5254833040421792, + "grad_norm": 0.9615404009819031, + "learning_rate": 1.4604102358420133e-05, + "loss": 0.1630096435546875, + "step": 7774 + }, + { + "epoch": 0.5255508990131135, + "grad_norm": 0.5808193683624268, + "learning_rate": 1.4600803987286764e-05, + "loss": 0.11433792114257812, + "step": 7775 + }, + { + "epoch": 0.5256184939840476, + "grad_norm": 0.9707137942314148, + "learning_rate": 1.4597505635468971e-05, + "loss": 0.1851959228515625, + "step": 7776 + }, + { + "epoch": 0.5256860889549817, + "grad_norm": 0.29826003313064575, + "learning_rate": 1.4594207303126367e-05, + "loss": 0.05898284912109375, + "step": 7777 + }, + { + "epoch": 0.5257536839259159, + "grad_norm": 1.0610177516937256, + "learning_rate": 1.459090899041853e-05, + "loss": 0.2459716796875, + "step": 7778 + }, + { + "epoch": 0.52582127889685, + "grad_norm": 1.111828327178955, + "learning_rate": 1.4587610697505062e-05, + "loss": 0.1700439453125, + "step": 7779 + }, + { + "epoch": 0.5258888738677843, + "grad_norm": 0.5917668342590332, + "learning_rate": 1.458431242454555e-05, + "loss": 0.09212875366210938, + "step": 7780 + }, + { + "epoch": 0.5259564688387184, + "grad_norm": 0.7016769051551819, + "learning_rate": 1.4581014171699597e-05, + "loss": 0.12277984619140625, + "step": 7781 + }, + { + "epoch": 0.5260240638096526, + "grad_norm": 0.3810853958129883, + "learning_rate": 1.4577715939126774e-05, + "loss": 0.05930328369140625, + "step": 7782 + }, + { + "epoch": 0.5260916587805867, + "grad_norm": 1.1406166553497314, + "learning_rate": 1.4574417726986687e-05, + "loss": 0.13297271728515625, + "step": 7783 + }, + { + "epoch": 0.5261592537515208, + "grad_norm": 0.9250016808509827, + "learning_rate": 1.4571119535438918e-05, + "loss": 0.1632537841796875, + "step": 7784 + }, + { + "epoch": 0.5262268487224551, + "grad_norm": 0.4322744905948639, + "learning_rate": 1.4567821364643048e-05, + "loss": 0.08097076416015625, + "step": 7785 + }, + { + "epoch": 0.5262944436933892, + "grad_norm": 0.8985788822174072, + "learning_rate": 1.456452321475867e-05, + "loss": 0.1244049072265625, + "step": 7786 + }, + { + "epoch": 0.5263620386643234, + "grad_norm": 0.3883446753025055, + "learning_rate": 1.4561225085945364e-05, + "loss": 0.04624176025390625, + "step": 7787 + }, + { + "epoch": 0.5264296336352575, + "grad_norm": 1.126214623451233, + "learning_rate": 1.4557926978362723e-05, + "loss": 0.15946197509765625, + "step": 7788 + }, + { + "epoch": 0.5264972286061917, + "grad_norm": 0.42213505506515503, + "learning_rate": 1.455462889217032e-05, + "loss": 0.076690673828125, + "step": 7789 + }, + { + "epoch": 0.5265648235771259, + "grad_norm": 0.4163946509361267, + "learning_rate": 1.4551330827527743e-05, + "loss": 0.0640411376953125, + "step": 7790 + }, + { + "epoch": 0.52663241854806, + "grad_norm": 0.31717079877853394, + "learning_rate": 1.4548032784594565e-05, + "loss": 0.060665130615234375, + "step": 7791 + }, + { + "epoch": 0.5267000135189942, + "grad_norm": 0.601576030254364, + "learning_rate": 1.454473476353038e-05, + "loss": 0.1312255859375, + "step": 7792 + }, + { + "epoch": 0.5267676084899283, + "grad_norm": 0.2824002504348755, + "learning_rate": 1.4541436764494753e-05, + "loss": 0.05629730224609375, + "step": 7793 + }, + { + "epoch": 0.5268352034608625, + "grad_norm": 0.23766116797924042, + "learning_rate": 1.4538138787647272e-05, + "loss": 0.04807090759277344, + "step": 7794 + }, + { + "epoch": 0.5269027984317967, + "grad_norm": 0.3792346715927124, + "learning_rate": 1.4534840833147507e-05, + "loss": 0.06293106079101562, + "step": 7795 + }, + { + "epoch": 0.5269703934027309, + "grad_norm": 0.617007851600647, + "learning_rate": 1.4531542901155032e-05, + "loss": 0.1086273193359375, + "step": 7796 + }, + { + "epoch": 0.527037988373665, + "grad_norm": 0.686079204082489, + "learning_rate": 1.4528244991829431e-05, + "loss": 0.131011962890625, + "step": 7797 + }, + { + "epoch": 0.5271055833445991, + "grad_norm": 0.4482971131801605, + "learning_rate": 1.4524947105330264e-05, + "loss": 0.08383560180664062, + "step": 7798 + }, + { + "epoch": 0.5271731783155333, + "grad_norm": 0.2522990107536316, + "learning_rate": 1.4521649241817113e-05, + "loss": 0.05178260803222656, + "step": 7799 + }, + { + "epoch": 0.5272407732864675, + "grad_norm": 0.2933029234409332, + "learning_rate": 1.4518351401449543e-05, + "loss": 0.07056427001953125, + "step": 7800 + }, + { + "epoch": 0.5273083682574017, + "grad_norm": 0.3001672327518463, + "learning_rate": 1.4515053584387137e-05, + "loss": 0.04324913024902344, + "step": 7801 + }, + { + "epoch": 0.5273759632283358, + "grad_norm": 0.43613603711128235, + "learning_rate": 1.4511755790789445e-05, + "loss": 0.066802978515625, + "step": 7802 + }, + { + "epoch": 0.52744355819927, + "grad_norm": 0.3631438910961151, + "learning_rate": 1.4508458020816048e-05, + "loss": 0.06880378723144531, + "step": 7803 + }, + { + "epoch": 0.5275111531702041, + "grad_norm": 0.6859033107757568, + "learning_rate": 1.4505160274626505e-05, + "loss": 0.140655517578125, + "step": 7804 + }, + { + "epoch": 0.5275787481411383, + "grad_norm": 0.5725837349891663, + "learning_rate": 1.4501862552380394e-05, + "loss": 0.11486053466796875, + "step": 7805 + }, + { + "epoch": 0.5276463431120725, + "grad_norm": 0.4707581698894501, + "learning_rate": 1.4498564854237266e-05, + "loss": 0.1052703857421875, + "step": 7806 + }, + { + "epoch": 0.5277139380830066, + "grad_norm": 0.6457346081733704, + "learning_rate": 1.4495267180356685e-05, + "loss": 0.113372802734375, + "step": 7807 + }, + { + "epoch": 0.5277815330539408, + "grad_norm": 2.074863910675049, + "learning_rate": 1.4491969530898225e-05, + "loss": 0.21869659423828125, + "step": 7808 + }, + { + "epoch": 0.5278491280248749, + "grad_norm": 0.683177649974823, + "learning_rate": 1.448867190602143e-05, + "loss": 0.09877777099609375, + "step": 7809 + }, + { + "epoch": 0.5279167229958092, + "grad_norm": 0.20157113671302795, + "learning_rate": 1.4485374305885875e-05, + "loss": 0.0343475341796875, + "step": 7810 + }, + { + "epoch": 0.5279843179667433, + "grad_norm": 0.8962563872337341, + "learning_rate": 1.4482076730651107e-05, + "loss": 0.14571380615234375, + "step": 7811 + }, + { + "epoch": 0.5280519129376774, + "grad_norm": 0.6978649497032166, + "learning_rate": 1.4478779180476689e-05, + "loss": 0.1509246826171875, + "step": 7812 + }, + { + "epoch": 0.5281195079086116, + "grad_norm": 0.6756827235221863, + "learning_rate": 1.4475481655522173e-05, + "loss": 0.1265106201171875, + "step": 7813 + }, + { + "epoch": 0.5281871028795457, + "grad_norm": 0.8115305304527283, + "learning_rate": 1.4472184155947122e-05, + "loss": 0.1583404541015625, + "step": 7814 + }, + { + "epoch": 0.52825469785048, + "grad_norm": 0.21494342386722565, + "learning_rate": 1.4468886681911079e-05, + "loss": 0.03903961181640625, + "step": 7815 + }, + { + "epoch": 0.5283222928214141, + "grad_norm": 0.8021060824394226, + "learning_rate": 1.4465589233573603e-05, + "loss": 0.1230926513671875, + "step": 7816 + }, + { + "epoch": 0.5283898877923483, + "grad_norm": 0.5801488161087036, + "learning_rate": 1.4462291811094241e-05, + "loss": 0.1216278076171875, + "step": 7817 + }, + { + "epoch": 0.5284574827632824, + "grad_norm": 0.7302011847496033, + "learning_rate": 1.4458994414632552e-05, + "loss": 0.1431732177734375, + "step": 7818 + }, + { + "epoch": 0.5285250777342165, + "grad_norm": 0.7044544816017151, + "learning_rate": 1.4455697044348073e-05, + "loss": 0.13448333740234375, + "step": 7819 + }, + { + "epoch": 0.5285926727051508, + "grad_norm": 0.5854066610336304, + "learning_rate": 1.4452399700400354e-05, + "loss": 0.14023590087890625, + "step": 7820 + }, + { + "epoch": 0.5286602676760849, + "grad_norm": 0.8943121433258057, + "learning_rate": 1.444910238294895e-05, + "loss": 0.1505126953125, + "step": 7821 + }, + { + "epoch": 0.5287278626470191, + "grad_norm": 1.2696588039398193, + "learning_rate": 1.444580509215339e-05, + "loss": 0.240936279296875, + "step": 7822 + }, + { + "epoch": 0.5287954576179532, + "grad_norm": 0.3829771876335144, + "learning_rate": 1.4442507828173235e-05, + "loss": 0.0940093994140625, + "step": 7823 + }, + { + "epoch": 0.5288630525888874, + "grad_norm": 0.6331086158752441, + "learning_rate": 1.443921059116801e-05, + "loss": 0.115264892578125, + "step": 7824 + }, + { + "epoch": 0.5289306475598216, + "grad_norm": 0.8299833536148071, + "learning_rate": 1.4435913381297269e-05, + "loss": 0.12799072265625, + "step": 7825 + }, + { + "epoch": 0.5289982425307557, + "grad_norm": 0.8248480558395386, + "learning_rate": 1.4432616198720541e-05, + "loss": 0.162689208984375, + "step": 7826 + }, + { + "epoch": 0.5290658375016899, + "grad_norm": 1.133908748626709, + "learning_rate": 1.442931904359738e-05, + "loss": 0.18404388427734375, + "step": 7827 + }, + { + "epoch": 0.529133432472624, + "grad_norm": 0.7553560137748718, + "learning_rate": 1.4426021916087303e-05, + "loss": 0.17816162109375, + "step": 7828 + }, + { + "epoch": 0.5292010274435582, + "grad_norm": 0.5731074810028076, + "learning_rate": 1.442272481634986e-05, + "loss": 0.1226806640625, + "step": 7829 + }, + { + "epoch": 0.5292686224144924, + "grad_norm": 0.23987752199172974, + "learning_rate": 1.4419427744544583e-05, + "loss": 0.04802703857421875, + "step": 7830 + }, + { + "epoch": 0.5293362173854266, + "grad_norm": 0.6760380268096924, + "learning_rate": 1.4416130700830996e-05, + "loss": 0.11360931396484375, + "step": 7831 + }, + { + "epoch": 0.5294038123563607, + "grad_norm": 0.6046381592750549, + "learning_rate": 1.441283368536864e-05, + "loss": 0.1412811279296875, + "step": 7832 + }, + { + "epoch": 0.5294714073272948, + "grad_norm": 0.8920596837997437, + "learning_rate": 1.4409536698317038e-05, + "loss": 0.11553573608398438, + "step": 7833 + }, + { + "epoch": 0.529539002298229, + "grad_norm": 1.0128817558288574, + "learning_rate": 1.4406239739835731e-05, + "loss": 0.1866607666015625, + "step": 7834 + }, + { + "epoch": 0.5296065972691631, + "grad_norm": 0.9588202238082886, + "learning_rate": 1.440294281008423e-05, + "loss": 0.174713134765625, + "step": 7835 + }, + { + "epoch": 0.5296741922400974, + "grad_norm": 1.1652169227600098, + "learning_rate": 1.439964590922207e-05, + "loss": 0.15837860107421875, + "step": 7836 + }, + { + "epoch": 0.5297417872110315, + "grad_norm": 0.31043365597724915, + "learning_rate": 1.4396349037408775e-05, + "loss": 0.05047607421875, + "step": 7837 + }, + { + "epoch": 0.5298093821819657, + "grad_norm": 0.43848752975463867, + "learning_rate": 1.4393052194803873e-05, + "loss": 0.08150482177734375, + "step": 7838 + }, + { + "epoch": 0.5298769771528998, + "grad_norm": 0.2620295584201813, + "learning_rate": 1.4389755381566873e-05, + "loss": 0.033718109130859375, + "step": 7839 + }, + { + "epoch": 0.529944572123834, + "grad_norm": 0.36080726981163025, + "learning_rate": 1.438645859785731e-05, + "loss": 0.0704345703125, + "step": 7840 + }, + { + "epoch": 0.5300121670947682, + "grad_norm": 0.796039879322052, + "learning_rate": 1.4383161843834693e-05, + "loss": 0.1392669677734375, + "step": 7841 + }, + { + "epoch": 0.5300797620657023, + "grad_norm": 0.4519433081150055, + "learning_rate": 1.4379865119658535e-05, + "loss": 0.07181930541992188, + "step": 7842 + }, + { + "epoch": 0.5301473570366365, + "grad_norm": 0.7429225444793701, + "learning_rate": 1.4376568425488371e-05, + "loss": 0.12358856201171875, + "step": 7843 + }, + { + "epoch": 0.5302149520075706, + "grad_norm": 0.35283660888671875, + "learning_rate": 1.4373271761483694e-05, + "loss": 0.069580078125, + "step": 7844 + }, + { + "epoch": 0.5302825469785049, + "grad_norm": 0.8035763502120972, + "learning_rate": 1.436997512780403e-05, + "loss": 0.11655044555664062, + "step": 7845 + }, + { + "epoch": 0.530350141949439, + "grad_norm": 0.3440534174442291, + "learning_rate": 1.4366678524608886e-05, + "loss": 0.08798980712890625, + "step": 7846 + }, + { + "epoch": 0.5304177369203731, + "grad_norm": 0.5429185032844543, + "learning_rate": 1.4363381952057779e-05, + "loss": 0.14217376708984375, + "step": 7847 + }, + { + "epoch": 0.5304853318913073, + "grad_norm": 0.27032455801963806, + "learning_rate": 1.4360085410310206e-05, + "loss": 0.047374725341796875, + "step": 7848 + }, + { + "epoch": 0.5305529268622414, + "grad_norm": 0.36750951409339905, + "learning_rate": 1.4356788899525681e-05, + "loss": 0.08596420288085938, + "step": 7849 + }, + { + "epoch": 0.5306205218331757, + "grad_norm": 0.6750341057777405, + "learning_rate": 1.4353492419863709e-05, + "loss": 0.1062164306640625, + "step": 7850 + }, + { + "epoch": 0.5306881168041098, + "grad_norm": 0.5903700590133667, + "learning_rate": 1.4350195971483802e-05, + "loss": 0.10321807861328125, + "step": 7851 + }, + { + "epoch": 0.5307557117750439, + "grad_norm": 0.5651221871376038, + "learning_rate": 1.4346899554545446e-05, + "loss": 0.14791107177734375, + "step": 7852 + }, + { + "epoch": 0.5308233067459781, + "grad_norm": 0.21390792727470398, + "learning_rate": 1.4343603169208154e-05, + "loss": 0.019992828369140625, + "step": 7853 + }, + { + "epoch": 0.5308909017169122, + "grad_norm": 0.9246128797531128, + "learning_rate": 1.4340306815631427e-05, + "loss": 0.1541290283203125, + "step": 7854 + }, + { + "epoch": 0.5309584966878464, + "grad_norm": 0.6882163286209106, + "learning_rate": 1.4337010493974752e-05, + "loss": 0.12290191650390625, + "step": 7855 + }, + { + "epoch": 0.5310260916587806, + "grad_norm": 0.30268606543540955, + "learning_rate": 1.433371420439764e-05, + "loss": 0.0498199462890625, + "step": 7856 + }, + { + "epoch": 0.5310936866297148, + "grad_norm": 0.36273282766342163, + "learning_rate": 1.433041794705957e-05, + "loss": 0.055446624755859375, + "step": 7857 + }, + { + "epoch": 0.5311612816006489, + "grad_norm": 0.1967957466840744, + "learning_rate": 1.4327121722120048e-05, + "loss": 0.0345306396484375, + "step": 7858 + }, + { + "epoch": 0.531228876571583, + "grad_norm": 0.8530599474906921, + "learning_rate": 1.4323825529738559e-05, + "loss": 0.1610260009765625, + "step": 7859 + }, + { + "epoch": 0.5312964715425172, + "grad_norm": 0.5336583852767944, + "learning_rate": 1.43205293700746e-05, + "loss": 0.09555816650390625, + "step": 7860 + }, + { + "epoch": 0.5313640665134514, + "grad_norm": 0.9822760820388794, + "learning_rate": 1.4317233243287654e-05, + "loss": 0.1266326904296875, + "step": 7861 + }, + { + "epoch": 0.5314316614843856, + "grad_norm": 1.256249189376831, + "learning_rate": 1.4313937149537209e-05, + "loss": 0.14117431640625, + "step": 7862 + }, + { + "epoch": 0.5314992564553197, + "grad_norm": 0.3200152516365051, + "learning_rate": 1.4310641088982748e-05, + "loss": 0.04699516296386719, + "step": 7863 + }, + { + "epoch": 0.5315668514262539, + "grad_norm": 0.6832531690597534, + "learning_rate": 1.4307345061783766e-05, + "loss": 0.130157470703125, + "step": 7864 + }, + { + "epoch": 0.531634446397188, + "grad_norm": 0.17677423357963562, + "learning_rate": 1.4304049068099736e-05, + "loss": 0.02629852294921875, + "step": 7865 + }, + { + "epoch": 0.5317020413681222, + "grad_norm": 0.9411543011665344, + "learning_rate": 1.4300753108090134e-05, + "loss": 0.14163970947265625, + "step": 7866 + }, + { + "epoch": 0.5317696363390564, + "grad_norm": 0.5398110747337341, + "learning_rate": 1.4297457181914453e-05, + "loss": 0.050601959228515625, + "step": 7867 + }, + { + "epoch": 0.5318372313099905, + "grad_norm": 0.41626760363578796, + "learning_rate": 1.4294161289732156e-05, + "loss": 0.07492828369140625, + "step": 7868 + }, + { + "epoch": 0.5319048262809247, + "grad_norm": 0.6301937699317932, + "learning_rate": 1.4290865431702732e-05, + "loss": 0.128662109375, + "step": 7869 + }, + { + "epoch": 0.5319724212518588, + "grad_norm": 0.7022750973701477, + "learning_rate": 1.428756960798564e-05, + "loss": 0.17254638671875, + "step": 7870 + }, + { + "epoch": 0.5320400162227931, + "grad_norm": 0.25050100684165955, + "learning_rate": 1.4284273818740366e-05, + "loss": 0.051849365234375, + "step": 7871 + }, + { + "epoch": 0.5321076111937272, + "grad_norm": 0.894612729549408, + "learning_rate": 1.4280978064126372e-05, + "loss": 0.10898590087890625, + "step": 7872 + }, + { + "epoch": 0.5321752061646613, + "grad_norm": 0.27655917406082153, + "learning_rate": 1.4277682344303138e-05, + "loss": 0.042827606201171875, + "step": 7873 + }, + { + "epoch": 0.5322428011355955, + "grad_norm": 0.6795775890350342, + "learning_rate": 1.4274386659430118e-05, + "loss": 0.123626708984375, + "step": 7874 + }, + { + "epoch": 0.5323103961065296, + "grad_norm": 0.8388192653656006, + "learning_rate": 1.4271091009666786e-05, + "loss": 0.14296722412109375, + "step": 7875 + }, + { + "epoch": 0.5323779910774639, + "grad_norm": 0.40566474199295044, + "learning_rate": 1.42677953951726e-05, + "loss": 0.0597991943359375, + "step": 7876 + }, + { + "epoch": 0.532445586048398, + "grad_norm": 0.9278604388237, + "learning_rate": 1.4264499816107035e-05, + "loss": 0.1143951416015625, + "step": 7877 + }, + { + "epoch": 0.5325131810193322, + "grad_norm": 0.46247756481170654, + "learning_rate": 1.4261204272629539e-05, + "loss": 0.07605743408203125, + "step": 7878 + }, + { + "epoch": 0.5325807759902663, + "grad_norm": 0.2978673279285431, + "learning_rate": 1.425790876489957e-05, + "loss": 0.0712890625, + "step": 7879 + }, + { + "epoch": 0.5326483709612004, + "grad_norm": 0.3332079350948334, + "learning_rate": 1.42546132930766e-05, + "loss": 0.05359649658203125, + "step": 7880 + }, + { + "epoch": 0.5327159659321347, + "grad_norm": 0.47237521409988403, + "learning_rate": 1.4251317857320064e-05, + "loss": 0.09473419189453125, + "step": 7881 + }, + { + "epoch": 0.5327835609030688, + "grad_norm": 0.3904496431350708, + "learning_rate": 1.424802245778943e-05, + "loss": 0.07086753845214844, + "step": 7882 + }, + { + "epoch": 0.532851155874003, + "grad_norm": 0.6029988527297974, + "learning_rate": 1.4244727094644144e-05, + "loss": 0.120025634765625, + "step": 7883 + }, + { + "epoch": 0.5329187508449371, + "grad_norm": 0.43635034561157227, + "learning_rate": 1.4241431768043666e-05, + "loss": 0.0706787109375, + "step": 7884 + }, + { + "epoch": 0.5329863458158713, + "grad_norm": 0.3957904279232025, + "learning_rate": 1.4238136478147427e-05, + "loss": 0.06231880187988281, + "step": 7885 + }, + { + "epoch": 0.5330539407868055, + "grad_norm": 0.46225330233573914, + "learning_rate": 1.4234841225114891e-05, + "loss": 0.116424560546875, + "step": 7886 + }, + { + "epoch": 0.5331215357577396, + "grad_norm": 0.4332607388496399, + "learning_rate": 1.4231546009105489e-05, + "loss": 0.09407806396484375, + "step": 7887 + }, + { + "epoch": 0.5331891307286738, + "grad_norm": 0.7623059749603271, + "learning_rate": 1.4228250830278673e-05, + "loss": 0.14302825927734375, + "step": 7888 + }, + { + "epoch": 0.5332567256996079, + "grad_norm": 0.8003389835357666, + "learning_rate": 1.4224955688793885e-05, + "loss": 0.10493850708007812, + "step": 7889 + }, + { + "epoch": 0.5333243206705421, + "grad_norm": 0.3111593425273895, + "learning_rate": 1.4221660584810553e-05, + "loss": 0.04326629638671875, + "step": 7890 + }, + { + "epoch": 0.5333919156414763, + "grad_norm": 0.2327532321214676, + "learning_rate": 1.4218365518488127e-05, + "loss": 0.0504302978515625, + "step": 7891 + }, + { + "epoch": 0.5334595106124105, + "grad_norm": 0.3810388445854187, + "learning_rate": 1.4215070489986034e-05, + "loss": 0.0555267333984375, + "step": 7892 + }, + { + "epoch": 0.5335271055833446, + "grad_norm": 0.6676393151283264, + "learning_rate": 1.4211775499463718e-05, + "loss": 0.1242218017578125, + "step": 7893 + }, + { + "epoch": 0.5335947005542787, + "grad_norm": 0.5850238800048828, + "learning_rate": 1.4208480547080601e-05, + "loss": 0.0958099365234375, + "step": 7894 + }, + { + "epoch": 0.5336622955252129, + "grad_norm": 0.5644176602363586, + "learning_rate": 1.4205185632996117e-05, + "loss": 0.1407318115234375, + "step": 7895 + }, + { + "epoch": 0.533729890496147, + "grad_norm": 0.26148465275764465, + "learning_rate": 1.4201890757369695e-05, + "loss": 0.05144500732421875, + "step": 7896 + }, + { + "epoch": 0.5337974854670813, + "grad_norm": 0.3328421115875244, + "learning_rate": 1.4198595920360769e-05, + "loss": 0.05919647216796875, + "step": 7897 + }, + { + "epoch": 0.5338650804380154, + "grad_norm": 1.2244189977645874, + "learning_rate": 1.4195301122128746e-05, + "loss": 0.11992645263671875, + "step": 7898 + }, + { + "epoch": 0.5339326754089496, + "grad_norm": 1.0835318565368652, + "learning_rate": 1.4192006362833065e-05, + "loss": 0.2126007080078125, + "step": 7899 + }, + { + "epoch": 0.5340002703798837, + "grad_norm": 1.4450384378433228, + "learning_rate": 1.4188711642633142e-05, + "loss": 0.16590499877929688, + "step": 7900 + }, + { + "epoch": 0.5340678653508178, + "grad_norm": 0.9245631098747253, + "learning_rate": 1.418541696168839e-05, + "loss": 0.20888519287109375, + "step": 7901 + }, + { + "epoch": 0.5341354603217521, + "grad_norm": 0.9935156106948853, + "learning_rate": 1.4182122320158238e-05, + "loss": 0.177032470703125, + "step": 7902 + }, + { + "epoch": 0.5342030552926862, + "grad_norm": 1.00603187084198, + "learning_rate": 1.4178827718202088e-05, + "loss": 0.157379150390625, + "step": 7903 + }, + { + "epoch": 0.5342706502636204, + "grad_norm": 0.14242970943450928, + "learning_rate": 1.4175533155979363e-05, + "loss": 0.016521453857421875, + "step": 7904 + }, + { + "epoch": 0.5343382452345545, + "grad_norm": 0.915583074092865, + "learning_rate": 1.4172238633649468e-05, + "loss": 0.17061614990234375, + "step": 7905 + }, + { + "epoch": 0.5344058402054888, + "grad_norm": 1.3846806287765503, + "learning_rate": 1.4168944151371821e-05, + "loss": 0.2015380859375, + "step": 7906 + }, + { + "epoch": 0.5344734351764229, + "grad_norm": 0.5794491767883301, + "learning_rate": 1.416564970930582e-05, + "loss": 0.113494873046875, + "step": 7907 + }, + { + "epoch": 0.534541030147357, + "grad_norm": 0.7246145606040955, + "learning_rate": 1.4162355307610876e-05, + "loss": 0.14591217041015625, + "step": 7908 + }, + { + "epoch": 0.5346086251182912, + "grad_norm": 0.27476754784584045, + "learning_rate": 1.4159060946446389e-05, + "loss": 0.05831146240234375, + "step": 7909 + }, + { + "epoch": 0.5346762200892253, + "grad_norm": 0.8639835715293884, + "learning_rate": 1.4155766625971769e-05, + "loss": 0.1394500732421875, + "step": 7910 + }, + { + "epoch": 0.5347438150601596, + "grad_norm": 1.0059317350387573, + "learning_rate": 1.4152472346346404e-05, + "loss": 0.193206787109375, + "step": 7911 + }, + { + "epoch": 0.5348114100310937, + "grad_norm": 0.26436588168144226, + "learning_rate": 1.41491781077297e-05, + "loss": 0.05559539794921875, + "step": 7912 + }, + { + "epoch": 0.5348790050020279, + "grad_norm": 0.42670196294784546, + "learning_rate": 1.4145883910281055e-05, + "loss": 0.0394744873046875, + "step": 7913 + }, + { + "epoch": 0.534946599972962, + "grad_norm": 0.5797026753425598, + "learning_rate": 1.4142589754159851e-05, + "loss": 0.09453201293945312, + "step": 7914 + }, + { + "epoch": 0.5350141949438961, + "grad_norm": 0.7149802446365356, + "learning_rate": 1.4139295639525494e-05, + "loss": 0.1450042724609375, + "step": 7915 + }, + { + "epoch": 0.5350817899148304, + "grad_norm": 0.4319196045398712, + "learning_rate": 1.413600156653736e-05, + "loss": 0.08562469482421875, + "step": 7916 + }, + { + "epoch": 0.5351493848857645, + "grad_norm": 1.383238434791565, + "learning_rate": 1.4132707535354845e-05, + "loss": 0.1354217529296875, + "step": 7917 + }, + { + "epoch": 0.5352169798566987, + "grad_norm": 0.2472517043352127, + "learning_rate": 1.4129413546137332e-05, + "loss": 0.038021087646484375, + "step": 7918 + }, + { + "epoch": 0.5352845748276328, + "grad_norm": 0.44345998764038086, + "learning_rate": 1.4126119599044212e-05, + "loss": 0.11224746704101562, + "step": 7919 + }, + { + "epoch": 0.535352169798567, + "grad_norm": 0.9530771970748901, + "learning_rate": 1.4122825694234853e-05, + "loss": 0.235260009765625, + "step": 7920 + }, + { + "epoch": 0.5354197647695011, + "grad_norm": 0.8044854998588562, + "learning_rate": 1.4119531831868646e-05, + "loss": 0.1689300537109375, + "step": 7921 + }, + { + "epoch": 0.5354873597404353, + "grad_norm": 0.8218318819999695, + "learning_rate": 1.4116238012104959e-05, + "loss": 0.167083740234375, + "step": 7922 + }, + { + "epoch": 0.5355549547113695, + "grad_norm": 0.16477683186531067, + "learning_rate": 1.4112944235103182e-05, + "loss": 0.025241851806640625, + "step": 7923 + }, + { + "epoch": 0.5356225496823036, + "grad_norm": 0.4455738663673401, + "learning_rate": 1.4109650501022675e-05, + "loss": 0.08040618896484375, + "step": 7924 + }, + { + "epoch": 0.5356901446532378, + "grad_norm": 0.3299295902252197, + "learning_rate": 1.410635681002281e-05, + "loss": 0.05389404296875, + "step": 7925 + }, + { + "epoch": 0.535757739624172, + "grad_norm": 1.2049036026000977, + "learning_rate": 1.4103063162262967e-05, + "loss": 0.18685150146484375, + "step": 7926 + }, + { + "epoch": 0.5358253345951062, + "grad_norm": 0.3160715103149414, + "learning_rate": 1.40997695579025e-05, + "loss": 0.0731353759765625, + "step": 7927 + }, + { + "epoch": 0.5358929295660403, + "grad_norm": 0.3283727169036865, + "learning_rate": 1.4096475997100785e-05, + "loss": 0.0612945556640625, + "step": 7928 + }, + { + "epoch": 0.5359605245369744, + "grad_norm": 0.6606415510177612, + "learning_rate": 1.4093182480017176e-05, + "loss": 0.093902587890625, + "step": 7929 + }, + { + "epoch": 0.5360281195079086, + "grad_norm": 0.4354439079761505, + "learning_rate": 1.4089889006811045e-05, + "loss": 0.1128997802734375, + "step": 7930 + }, + { + "epoch": 0.5360957144788427, + "grad_norm": 0.86867356300354, + "learning_rate": 1.4086595577641736e-05, + "loss": 0.12420845031738281, + "step": 7931 + }, + { + "epoch": 0.536163309449777, + "grad_norm": 0.23737339675426483, + "learning_rate": 1.408330219266862e-05, + "loss": 0.038700103759765625, + "step": 7932 + }, + { + "epoch": 0.5362309044207111, + "grad_norm": 1.199877381324768, + "learning_rate": 1.4080008852051037e-05, + "loss": 0.213226318359375, + "step": 7933 + }, + { + "epoch": 0.5362984993916453, + "grad_norm": 0.6472064256668091, + "learning_rate": 1.4076715555948353e-05, + "loss": 0.10913848876953125, + "step": 7934 + }, + { + "epoch": 0.5363660943625794, + "grad_norm": 0.24325579404830933, + "learning_rate": 1.4073422304519915e-05, + "loss": 0.0391387939453125, + "step": 7935 + }, + { + "epoch": 0.5364336893335135, + "grad_norm": 0.798977255821228, + "learning_rate": 1.4070129097925061e-05, + "loss": 0.1095123291015625, + "step": 7936 + }, + { + "epoch": 0.5365012843044478, + "grad_norm": 0.6458960175514221, + "learning_rate": 1.4066835936323147e-05, + "loss": 0.1405792236328125, + "step": 7937 + }, + { + "epoch": 0.5365688792753819, + "grad_norm": 0.40050944685935974, + "learning_rate": 1.406354281987351e-05, + "loss": 0.0773773193359375, + "step": 7938 + }, + { + "epoch": 0.5366364742463161, + "grad_norm": 0.1380126178264618, + "learning_rate": 1.4060249748735503e-05, + "loss": 0.030162811279296875, + "step": 7939 + }, + { + "epoch": 0.5367040692172502, + "grad_norm": 0.39146488904953003, + "learning_rate": 1.4056956723068451e-05, + "loss": 0.056396484375, + "step": 7940 + }, + { + "epoch": 0.5367716641881844, + "grad_norm": 0.27453404664993286, + "learning_rate": 1.40536637430317e-05, + "loss": 0.06260299682617188, + "step": 7941 + }, + { + "epoch": 0.5368392591591186, + "grad_norm": 0.6730799078941345, + "learning_rate": 1.405037080878458e-05, + "loss": 0.14865875244140625, + "step": 7942 + }, + { + "epoch": 0.5369068541300527, + "grad_norm": 0.6174188852310181, + "learning_rate": 1.4047077920486432e-05, + "loss": 0.09447288513183594, + "step": 7943 + }, + { + "epoch": 0.5369744491009869, + "grad_norm": 0.4815419912338257, + "learning_rate": 1.4043785078296573e-05, + "loss": 0.0825958251953125, + "step": 7944 + }, + { + "epoch": 0.537042044071921, + "grad_norm": 0.7334542274475098, + "learning_rate": 1.4040492282374342e-05, + "loss": 0.12614822387695312, + "step": 7945 + }, + { + "epoch": 0.5371096390428552, + "grad_norm": 0.2944069802761078, + "learning_rate": 1.403719953287906e-05, + "loss": 0.043243408203125, + "step": 7946 + }, + { + "epoch": 0.5371772340137894, + "grad_norm": 0.7237251996994019, + "learning_rate": 1.4033906829970056e-05, + "loss": 0.200592041015625, + "step": 7947 + }, + { + "epoch": 0.5372448289847236, + "grad_norm": 0.28858450055122375, + "learning_rate": 1.403061417380665e-05, + "loss": 0.0568084716796875, + "step": 7948 + }, + { + "epoch": 0.5373124239556577, + "grad_norm": 1.1991513967514038, + "learning_rate": 1.402732156454815e-05, + "loss": 0.19879150390625, + "step": 7949 + }, + { + "epoch": 0.5373800189265918, + "grad_norm": 0.7208271622657776, + "learning_rate": 1.4024029002353887e-05, + "loss": 0.169708251953125, + "step": 7950 + }, + { + "epoch": 0.537447613897526, + "grad_norm": 0.4227520823478699, + "learning_rate": 1.4020736487383166e-05, + "loss": 0.1191253662109375, + "step": 7951 + }, + { + "epoch": 0.5375152088684602, + "grad_norm": 0.5658390522003174, + "learning_rate": 1.401744401979531e-05, + "loss": 0.1152191162109375, + "step": 7952 + }, + { + "epoch": 0.5375828038393944, + "grad_norm": 1.0327955484390259, + "learning_rate": 1.4014151599749618e-05, + "loss": 0.16485595703125, + "step": 7953 + }, + { + "epoch": 0.5376503988103285, + "grad_norm": 0.6284570097923279, + "learning_rate": 1.4010859227405405e-05, + "loss": 0.1346893310546875, + "step": 7954 + }, + { + "epoch": 0.5377179937812627, + "grad_norm": 0.5999677181243896, + "learning_rate": 1.4007566902921968e-05, + "loss": 0.11419677734375, + "step": 7955 + }, + { + "epoch": 0.5377855887521968, + "grad_norm": 0.3955070376396179, + "learning_rate": 1.4004274626458627e-05, + "loss": 0.0858917236328125, + "step": 7956 + }, + { + "epoch": 0.537853183723131, + "grad_norm": 0.685329258441925, + "learning_rate": 1.4000982398174663e-05, + "loss": 0.1248016357421875, + "step": 7957 + }, + { + "epoch": 0.5379207786940652, + "grad_norm": 1.0791070461273193, + "learning_rate": 1.3997690218229386e-05, + "loss": 0.20831298828125, + "step": 7958 + }, + { + "epoch": 0.5379883736649993, + "grad_norm": 0.2417791336774826, + "learning_rate": 1.3994398086782095e-05, + "loss": 0.04335784912109375, + "step": 7959 + }, + { + "epoch": 0.5380559686359335, + "grad_norm": 0.8970707058906555, + "learning_rate": 1.399110600399207e-05, + "loss": 0.126953125, + "step": 7960 + }, + { + "epoch": 0.5381235636068676, + "grad_norm": 0.2798116207122803, + "learning_rate": 1.3987813970018618e-05, + "loss": 0.0762939453125, + "step": 7961 + }, + { + "epoch": 0.5381911585778019, + "grad_norm": 0.2925916910171509, + "learning_rate": 1.3984521985021015e-05, + "loss": 0.037975311279296875, + "step": 7962 + }, + { + "epoch": 0.538258753548736, + "grad_norm": 0.9886841177940369, + "learning_rate": 1.3981230049158556e-05, + "loss": 0.118804931640625, + "step": 7963 + }, + { + "epoch": 0.5383263485196701, + "grad_norm": 1.2565571069717407, + "learning_rate": 1.397793816259052e-05, + "loss": 0.1735992431640625, + "step": 7964 + }, + { + "epoch": 0.5383939434906043, + "grad_norm": 0.7842616438865662, + "learning_rate": 1.3974646325476197e-05, + "loss": 0.12042999267578125, + "step": 7965 + }, + { + "epoch": 0.5384615384615384, + "grad_norm": 0.33730342984199524, + "learning_rate": 1.3971354537974857e-05, + "loss": 0.069549560546875, + "step": 7966 + }, + { + "epoch": 0.5385291334324727, + "grad_norm": 1.5100624561309814, + "learning_rate": 1.3968062800245783e-05, + "loss": 0.207122802734375, + "step": 7967 + }, + { + "epoch": 0.5385967284034068, + "grad_norm": 0.2892796993255615, + "learning_rate": 1.3964771112448246e-05, + "loss": 0.042639732360839844, + "step": 7968 + }, + { + "epoch": 0.538664323374341, + "grad_norm": 0.8489699959754944, + "learning_rate": 1.3961479474741527e-05, + "loss": 0.15850830078125, + "step": 7969 + }, + { + "epoch": 0.5387319183452751, + "grad_norm": 1.560918927192688, + "learning_rate": 1.3958187887284885e-05, + "loss": 0.20648193359375, + "step": 7970 + }, + { + "epoch": 0.5387995133162092, + "grad_norm": 0.4244653284549713, + "learning_rate": 1.3954896350237592e-05, + "loss": 0.06064414978027344, + "step": 7971 + }, + { + "epoch": 0.5388671082871435, + "grad_norm": 0.27052977681159973, + "learning_rate": 1.3951604863758915e-05, + "loss": 0.0626373291015625, + "step": 7972 + }, + { + "epoch": 0.5389347032580776, + "grad_norm": 0.8650408983230591, + "learning_rate": 1.3948313428008112e-05, + "loss": 0.13382720947265625, + "step": 7973 + }, + { + "epoch": 0.5390022982290118, + "grad_norm": 0.3710451126098633, + "learning_rate": 1.3945022043144446e-05, + "loss": 0.07556915283203125, + "step": 7974 + }, + { + "epoch": 0.5390698931999459, + "grad_norm": 0.530544638633728, + "learning_rate": 1.3941730709327173e-05, + "loss": 0.07806396484375, + "step": 7975 + }, + { + "epoch": 0.5391374881708801, + "grad_norm": 0.22228363156318665, + "learning_rate": 1.3938439426715556e-05, + "loss": 0.04424285888671875, + "step": 7976 + }, + { + "epoch": 0.5392050831418143, + "grad_norm": 0.9667763710021973, + "learning_rate": 1.3935148195468834e-05, + "loss": 0.09967041015625, + "step": 7977 + }, + { + "epoch": 0.5392726781127484, + "grad_norm": 0.5112848281860352, + "learning_rate": 1.3931857015746272e-05, + "loss": 0.08945465087890625, + "step": 7978 + }, + { + "epoch": 0.5393402730836826, + "grad_norm": 0.9108312726020813, + "learning_rate": 1.3928565887707102e-05, + "loss": 0.1956634521484375, + "step": 7979 + }, + { + "epoch": 0.5394078680546167, + "grad_norm": 0.8394625186920166, + "learning_rate": 1.3925274811510584e-05, + "loss": 0.13836669921875, + "step": 7980 + }, + { + "epoch": 0.5394754630255509, + "grad_norm": 0.6410658359527588, + "learning_rate": 1.3921983787315947e-05, + "loss": 0.1425628662109375, + "step": 7981 + }, + { + "epoch": 0.539543057996485, + "grad_norm": 0.2592678666114807, + "learning_rate": 1.3918692815282448e-05, + "loss": 0.030672073364257812, + "step": 7982 + }, + { + "epoch": 0.5396106529674192, + "grad_norm": 0.1860521286725998, + "learning_rate": 1.391540189556931e-05, + "loss": 0.028173446655273438, + "step": 7983 + }, + { + "epoch": 0.5396782479383534, + "grad_norm": 0.5105587244033813, + "learning_rate": 1.3912111028335771e-05, + "loss": 0.12635421752929688, + "step": 7984 + }, + { + "epoch": 0.5397458429092875, + "grad_norm": 0.4287429451942444, + "learning_rate": 1.3908820213741074e-05, + "loss": 0.0968780517578125, + "step": 7985 + }, + { + "epoch": 0.5398134378802217, + "grad_norm": 1.128154993057251, + "learning_rate": 1.3905529451944433e-05, + "loss": 0.167327880859375, + "step": 7986 + }, + { + "epoch": 0.5398810328511559, + "grad_norm": 0.3637417256832123, + "learning_rate": 1.3902238743105087e-05, + "loss": 0.07372665405273438, + "step": 7987 + }, + { + "epoch": 0.5399486278220901, + "grad_norm": 0.3077141046524048, + "learning_rate": 1.3898948087382255e-05, + "loss": 0.0604248046875, + "step": 7988 + }, + { + "epoch": 0.5400162227930242, + "grad_norm": 0.23154781758785248, + "learning_rate": 1.3895657484935167e-05, + "loss": 0.02567291259765625, + "step": 7989 + }, + { + "epoch": 0.5400838177639583, + "grad_norm": 0.295246958732605, + "learning_rate": 1.389236693592303e-05, + "loss": 0.0543212890625, + "step": 7990 + }, + { + "epoch": 0.5401514127348925, + "grad_norm": 0.9949105978012085, + "learning_rate": 1.3889076440505078e-05, + "loss": 0.24041748046875, + "step": 7991 + }, + { + "epoch": 0.5402190077058266, + "grad_norm": 0.30308282375335693, + "learning_rate": 1.3885785998840505e-05, + "loss": 0.048735618591308594, + "step": 7992 + }, + { + "epoch": 0.5402866026767609, + "grad_norm": 0.8469952344894409, + "learning_rate": 1.3882495611088546e-05, + "loss": 0.1365966796875, + "step": 7993 + }, + { + "epoch": 0.540354197647695, + "grad_norm": 0.790163516998291, + "learning_rate": 1.3879205277408399e-05, + "loss": 0.18902587890625, + "step": 7994 + }, + { + "epoch": 0.5404217926186292, + "grad_norm": 0.2964572310447693, + "learning_rate": 1.3875914997959264e-05, + "loss": 0.07635498046875, + "step": 7995 + }, + { + "epoch": 0.5404893875895633, + "grad_norm": 0.8240132927894592, + "learning_rate": 1.3872624772900353e-05, + "loss": 0.1706695556640625, + "step": 7996 + }, + { + "epoch": 0.5405569825604974, + "grad_norm": 0.1850089132785797, + "learning_rate": 1.3869334602390866e-05, + "loss": 0.028921127319335938, + "step": 7997 + }, + { + "epoch": 0.5406245775314317, + "grad_norm": 0.47288623452186584, + "learning_rate": 1.3866044486590009e-05, + "loss": 0.09357452392578125, + "step": 7998 + }, + { + "epoch": 0.5406921725023658, + "grad_norm": 0.22795337438583374, + "learning_rate": 1.3862754425656963e-05, + "loss": 0.034793853759765625, + "step": 7999 + }, + { + "epoch": 0.5407597674733, + "grad_norm": 0.44087594747543335, + "learning_rate": 1.3859464419750936e-05, + "loss": 0.08599853515625, + "step": 8000 + }, + { + "epoch": 0.5408273624442341, + "grad_norm": 0.20032966136932373, + "learning_rate": 1.3856174469031108e-05, + "loss": 0.03066253662109375, + "step": 8001 + }, + { + "epoch": 0.5408949574151684, + "grad_norm": 0.6611281037330627, + "learning_rate": 1.385288457365668e-05, + "loss": 0.105499267578125, + "step": 8002 + }, + { + "epoch": 0.5409625523861025, + "grad_norm": 0.2949441373348236, + "learning_rate": 1.3849594733786825e-05, + "loss": 0.06093597412109375, + "step": 8003 + }, + { + "epoch": 0.5410301473570366, + "grad_norm": 0.5381643176078796, + "learning_rate": 1.3846304949580733e-05, + "loss": 0.108856201171875, + "step": 8004 + }, + { + "epoch": 0.5410977423279708, + "grad_norm": 1.3359957933425903, + "learning_rate": 1.3843015221197586e-05, + "loss": 0.1993255615234375, + "step": 8005 + }, + { + "epoch": 0.5411653372989049, + "grad_norm": 0.49135956168174744, + "learning_rate": 1.383972554879655e-05, + "loss": 0.08465194702148438, + "step": 8006 + }, + { + "epoch": 0.5412329322698392, + "grad_norm": 0.744743824005127, + "learning_rate": 1.3836435932536816e-05, + "loss": 0.137054443359375, + "step": 8007 + }, + { + "epoch": 0.5413005272407733, + "grad_norm": 0.7822222709655762, + "learning_rate": 1.3833146372577539e-05, + "loss": 0.13474273681640625, + "step": 8008 + }, + { + "epoch": 0.5413681222117075, + "grad_norm": 0.9682967662811279, + "learning_rate": 1.3829856869077902e-05, + "loss": 0.12679290771484375, + "step": 8009 + }, + { + "epoch": 0.5414357171826416, + "grad_norm": 1.0126307010650635, + "learning_rate": 1.3826567422197063e-05, + "loss": 0.14025115966796875, + "step": 8010 + }, + { + "epoch": 0.5415033121535757, + "grad_norm": 0.8352901339530945, + "learning_rate": 1.3823278032094195e-05, + "loss": 0.1382598876953125, + "step": 8011 + }, + { + "epoch": 0.54157090712451, + "grad_norm": 0.27215853333473206, + "learning_rate": 1.381998869892845e-05, + "loss": 0.03922271728515625, + "step": 8012 + }, + { + "epoch": 0.5416385020954441, + "grad_norm": 0.4145391583442688, + "learning_rate": 1.3816699422858991e-05, + "loss": 0.06168365478515625, + "step": 8013 + }, + { + "epoch": 0.5417060970663783, + "grad_norm": 1.6612569093704224, + "learning_rate": 1.3813410204044971e-05, + "loss": 0.2121734619140625, + "step": 8014 + }, + { + "epoch": 0.5417736920373124, + "grad_norm": 0.41879045963287354, + "learning_rate": 1.381012104264555e-05, + "loss": 0.08638763427734375, + "step": 8015 + }, + { + "epoch": 0.5418412870082466, + "grad_norm": 0.741407573223114, + "learning_rate": 1.3806831938819868e-05, + "loss": 0.15203857421875, + "step": 8016 + }, + { + "epoch": 0.5419088819791807, + "grad_norm": 0.47365832328796387, + "learning_rate": 1.3803542892727079e-05, + "loss": 0.09521484375, + "step": 8017 + }, + { + "epoch": 0.5419764769501149, + "grad_norm": 0.37035101652145386, + "learning_rate": 1.3800253904526329e-05, + "loss": 0.0874481201171875, + "step": 8018 + }, + { + "epoch": 0.5420440719210491, + "grad_norm": 0.31646403670310974, + "learning_rate": 1.379696497437675e-05, + "loss": 0.040927886962890625, + "step": 8019 + }, + { + "epoch": 0.5421116668919832, + "grad_norm": 0.782514214515686, + "learning_rate": 1.3793676102437489e-05, + "loss": 0.10840606689453125, + "step": 8020 + }, + { + "epoch": 0.5421792618629174, + "grad_norm": 0.3066462576389313, + "learning_rate": 1.3790387288867678e-05, + "loss": 0.06797027587890625, + "step": 8021 + }, + { + "epoch": 0.5422468568338515, + "grad_norm": 0.9876791834831238, + "learning_rate": 1.3787098533826459e-05, + "loss": 0.1804351806640625, + "step": 8022 + }, + { + "epoch": 0.5423144518047858, + "grad_norm": 0.9696218967437744, + "learning_rate": 1.3783809837472949e-05, + "loss": 0.231109619140625, + "step": 8023 + }, + { + "epoch": 0.5423820467757199, + "grad_norm": 0.7452170252799988, + "learning_rate": 1.3780521199966288e-05, + "loss": 0.1635284423828125, + "step": 8024 + }, + { + "epoch": 0.542449641746654, + "grad_norm": 0.7909448742866516, + "learning_rate": 1.3777232621465592e-05, + "loss": 0.13895416259765625, + "step": 8025 + }, + { + "epoch": 0.5425172367175882, + "grad_norm": 0.5065629482269287, + "learning_rate": 1.3773944102129985e-05, + "loss": 0.1056976318359375, + "step": 8026 + }, + { + "epoch": 0.5425848316885223, + "grad_norm": 0.7307884693145752, + "learning_rate": 1.3770655642118588e-05, + "loss": 0.20623779296875, + "step": 8027 + }, + { + "epoch": 0.5426524266594566, + "grad_norm": 0.6749990582466125, + "learning_rate": 1.376736724159052e-05, + "loss": 0.1404266357421875, + "step": 8028 + }, + { + "epoch": 0.5427200216303907, + "grad_norm": 0.7491754293441772, + "learning_rate": 1.3764078900704889e-05, + "loss": 0.175201416015625, + "step": 8029 + }, + { + "epoch": 0.5427876166013249, + "grad_norm": 1.178183674812317, + "learning_rate": 1.3760790619620803e-05, + "loss": 0.218017578125, + "step": 8030 + }, + { + "epoch": 0.542855211572259, + "grad_norm": 0.5334516763687134, + "learning_rate": 1.3757502398497379e-05, + "loss": 0.095611572265625, + "step": 8031 + }, + { + "epoch": 0.5429228065431931, + "grad_norm": 0.25239014625549316, + "learning_rate": 1.375421423749371e-05, + "loss": 0.04993438720703125, + "step": 8032 + }, + { + "epoch": 0.5429904015141274, + "grad_norm": 0.5059656500816345, + "learning_rate": 1.3750926136768906e-05, + "loss": 0.0932159423828125, + "step": 8033 + }, + { + "epoch": 0.5430579964850615, + "grad_norm": 0.32171082496643066, + "learning_rate": 1.3747638096482063e-05, + "loss": 0.05992889404296875, + "step": 8034 + }, + { + "epoch": 0.5431255914559957, + "grad_norm": 0.7351234555244446, + "learning_rate": 1.3744350116792282e-05, + "loss": 0.11113739013671875, + "step": 8035 + }, + { + "epoch": 0.5431931864269298, + "grad_norm": 0.6645640134811401, + "learning_rate": 1.3741062197858644e-05, + "loss": 0.1231536865234375, + "step": 8036 + }, + { + "epoch": 0.543260781397864, + "grad_norm": 0.7830576300621033, + "learning_rate": 1.3737774339840253e-05, + "loss": 0.12872314453125, + "step": 8037 + }, + { + "epoch": 0.5433283763687982, + "grad_norm": 0.7326522469520569, + "learning_rate": 1.373448654289618e-05, + "loss": 0.1785888671875, + "step": 8038 + }, + { + "epoch": 0.5433959713397323, + "grad_norm": 1.2750579118728638, + "learning_rate": 1.3731198807185527e-05, + "loss": 0.19451904296875, + "step": 8039 + }, + { + "epoch": 0.5434635663106665, + "grad_norm": 0.37481236457824707, + "learning_rate": 1.3727911132867368e-05, + "loss": 0.0521240234375, + "step": 8040 + }, + { + "epoch": 0.5435311612816006, + "grad_norm": 0.7382444143295288, + "learning_rate": 1.372462352010077e-05, + "loss": 0.1142730712890625, + "step": 8041 + }, + { + "epoch": 0.5435987562525348, + "grad_norm": 0.5342646241188049, + "learning_rate": 1.3721335969044824e-05, + "loss": 0.10446929931640625, + "step": 8042 + }, + { + "epoch": 0.543666351223469, + "grad_norm": 0.563037097454071, + "learning_rate": 1.371804847985859e-05, + "loss": 0.12200927734375, + "step": 8043 + }, + { + "epoch": 0.5437339461944032, + "grad_norm": 0.28319573402404785, + "learning_rate": 1.3714761052701151e-05, + "loss": 0.06504058837890625, + "step": 8044 + }, + { + "epoch": 0.5438015411653373, + "grad_norm": 0.29094257950782776, + "learning_rate": 1.3711473687731558e-05, + "loss": 0.05748748779296875, + "step": 8045 + }, + { + "epoch": 0.5438691361362714, + "grad_norm": 0.6331826448440552, + "learning_rate": 1.3708186385108885e-05, + "loss": 0.189697265625, + "step": 8046 + }, + { + "epoch": 0.5439367311072056, + "grad_norm": 0.6562678813934326, + "learning_rate": 1.3704899144992185e-05, + "loss": 0.145599365234375, + "step": 8047 + }, + { + "epoch": 0.5440043260781398, + "grad_norm": 1.011159062385559, + "learning_rate": 1.3701611967540524e-05, + "loss": 0.138031005859375, + "step": 8048 + }, + { + "epoch": 0.544071921049074, + "grad_norm": 0.5796054601669312, + "learning_rate": 1.3698324852912945e-05, + "loss": 0.11594390869140625, + "step": 8049 + }, + { + "epoch": 0.5441395160200081, + "grad_norm": 0.529175877571106, + "learning_rate": 1.3695037801268507e-05, + "loss": 0.095306396484375, + "step": 8050 + }, + { + "epoch": 0.5442071109909423, + "grad_norm": 0.6151182651519775, + "learning_rate": 1.3691750812766255e-05, + "loss": 0.113983154296875, + "step": 8051 + }, + { + "epoch": 0.5442747059618764, + "grad_norm": 1.1291686296463013, + "learning_rate": 1.3688463887565239e-05, + "loss": 0.192657470703125, + "step": 8052 + }, + { + "epoch": 0.5443423009328106, + "grad_norm": 0.44949114322662354, + "learning_rate": 1.3685177025824496e-05, + "loss": 0.099090576171875, + "step": 8053 + }, + { + "epoch": 0.5444098959037448, + "grad_norm": 1.0441043376922607, + "learning_rate": 1.368189022770306e-05, + "loss": 0.166412353515625, + "step": 8054 + }, + { + "epoch": 0.5444774908746789, + "grad_norm": 0.36203524470329285, + "learning_rate": 1.3678603493359974e-05, + "loss": 0.06980133056640625, + "step": 8055 + }, + { + "epoch": 0.5445450858456131, + "grad_norm": 0.1702326089143753, + "learning_rate": 1.3675316822954267e-05, + "loss": 0.03226470947265625, + "step": 8056 + }, + { + "epoch": 0.5446126808165472, + "grad_norm": 0.8674229979515076, + "learning_rate": 1.3672030216644976e-05, + "loss": 0.13878631591796875, + "step": 8057 + }, + { + "epoch": 0.5446802757874815, + "grad_norm": 0.5633774995803833, + "learning_rate": 1.3668743674591115e-05, + "loss": 0.1220245361328125, + "step": 8058 + }, + { + "epoch": 0.5447478707584156, + "grad_norm": 0.4140389859676361, + "learning_rate": 1.3665457196951717e-05, + "loss": 0.057708740234375, + "step": 8059 + }, + { + "epoch": 0.5448154657293497, + "grad_norm": 0.39824023842811584, + "learning_rate": 1.3662170783885796e-05, + "loss": 0.06510543823242188, + "step": 8060 + }, + { + "epoch": 0.5448830607002839, + "grad_norm": 0.7275193929672241, + "learning_rate": 1.365888443555238e-05, + "loss": 0.149261474609375, + "step": 8061 + }, + { + "epoch": 0.544950655671218, + "grad_norm": 0.768375813961029, + "learning_rate": 1.3655598152110468e-05, + "loss": 0.1510772705078125, + "step": 8062 + }, + { + "epoch": 0.5450182506421523, + "grad_norm": 0.5084482431411743, + "learning_rate": 1.365231193371908e-05, + "loss": 0.0994110107421875, + "step": 8063 + }, + { + "epoch": 0.5450858456130864, + "grad_norm": 1.2519041299819946, + "learning_rate": 1.3649025780537228e-05, + "loss": 0.19427490234375, + "step": 8064 + }, + { + "epoch": 0.5451534405840206, + "grad_norm": 0.5846375823020935, + "learning_rate": 1.3645739692723902e-05, + "loss": 0.11841583251953125, + "step": 8065 + }, + { + "epoch": 0.5452210355549547, + "grad_norm": 0.49747148156166077, + "learning_rate": 1.3642453670438114e-05, + "loss": 0.10649871826171875, + "step": 8066 + }, + { + "epoch": 0.5452886305258888, + "grad_norm": 1.5997974872589111, + "learning_rate": 1.3639167713838859e-05, + "loss": 0.20130157470703125, + "step": 8067 + }, + { + "epoch": 0.545356225496823, + "grad_norm": 0.38380563259124756, + "learning_rate": 1.3635881823085138e-05, + "loss": 0.0692901611328125, + "step": 8068 + }, + { + "epoch": 0.5454238204677572, + "grad_norm": 0.6070265173912048, + "learning_rate": 1.3632595998335932e-05, + "loss": 0.0864715576171875, + "step": 8069 + }, + { + "epoch": 0.5454914154386914, + "grad_norm": 0.46851882338523865, + "learning_rate": 1.362931023975024e-05, + "loss": 0.08270645141601562, + "step": 8070 + }, + { + "epoch": 0.5455590104096255, + "grad_norm": 0.9922953844070435, + "learning_rate": 1.362602454748704e-05, + "loss": 0.187774658203125, + "step": 8071 + }, + { + "epoch": 0.5456266053805597, + "grad_norm": 0.49050116539001465, + "learning_rate": 1.3622738921705317e-05, + "loss": 0.10053253173828125, + "step": 8072 + }, + { + "epoch": 0.5456942003514939, + "grad_norm": 0.718302845954895, + "learning_rate": 1.3619453362564048e-05, + "loss": 0.1676025390625, + "step": 8073 + }, + { + "epoch": 0.545761795322428, + "grad_norm": 0.7782896757125854, + "learning_rate": 1.3616167870222217e-05, + "loss": 0.142181396484375, + "step": 8074 + }, + { + "epoch": 0.5458293902933622, + "grad_norm": 0.593317449092865, + "learning_rate": 1.3612882444838787e-05, + "loss": 0.1479644775390625, + "step": 8075 + }, + { + "epoch": 0.5458969852642963, + "grad_norm": 0.7353683710098267, + "learning_rate": 1.360959708657273e-05, + "loss": 0.1597747802734375, + "step": 8076 + }, + { + "epoch": 0.5459645802352305, + "grad_norm": 0.4498155415058136, + "learning_rate": 1.3606311795583016e-05, + "loss": 0.099090576171875, + "step": 8077 + }, + { + "epoch": 0.5460321752061646, + "grad_norm": 0.6899306774139404, + "learning_rate": 1.36030265720286e-05, + "loss": 0.1245269775390625, + "step": 8078 + }, + { + "epoch": 0.5460997701770989, + "grad_norm": 1.2009071111679077, + "learning_rate": 1.3599741416068449e-05, + "loss": 0.1864013671875, + "step": 8079 + }, + { + "epoch": 0.546167365148033, + "grad_norm": 1.443332314491272, + "learning_rate": 1.3596456327861513e-05, + "loss": 0.11950492858886719, + "step": 8080 + }, + { + "epoch": 0.5462349601189671, + "grad_norm": 0.7422651052474976, + "learning_rate": 1.3593171307566755e-05, + "loss": 0.1607666015625, + "step": 8081 + }, + { + "epoch": 0.5463025550899013, + "grad_norm": 0.7376649975776672, + "learning_rate": 1.358988635534311e-05, + "loss": 0.1514129638671875, + "step": 8082 + }, + { + "epoch": 0.5463701500608354, + "grad_norm": 0.7804709672927856, + "learning_rate": 1.3586601471349541e-05, + "loss": 0.152008056640625, + "step": 8083 + }, + { + "epoch": 0.5464377450317697, + "grad_norm": 0.3078311085700989, + "learning_rate": 1.3583316655744976e-05, + "loss": 0.0546722412109375, + "step": 8084 + }, + { + "epoch": 0.5465053400027038, + "grad_norm": 0.6022128462791443, + "learning_rate": 1.3580031908688364e-05, + "loss": 0.1046142578125, + "step": 8085 + }, + { + "epoch": 0.546572934973638, + "grad_norm": 0.3757137358188629, + "learning_rate": 1.3576747230338635e-05, + "loss": 0.0682525634765625, + "step": 8086 + }, + { + "epoch": 0.5466405299445721, + "grad_norm": 0.5058135390281677, + "learning_rate": 1.3573462620854734e-05, + "loss": 0.110137939453125, + "step": 8087 + }, + { + "epoch": 0.5467081249155062, + "grad_norm": 0.8202840089797974, + "learning_rate": 1.3570178080395579e-05, + "loss": 0.123992919921875, + "step": 8088 + }, + { + "epoch": 0.5467757198864405, + "grad_norm": 0.3451019525527954, + "learning_rate": 1.3566893609120098e-05, + "loss": 0.0640716552734375, + "step": 8089 + }, + { + "epoch": 0.5468433148573746, + "grad_norm": 0.6613684892654419, + "learning_rate": 1.3563609207187221e-05, + "loss": 0.11733245849609375, + "step": 8090 + }, + { + "epoch": 0.5469109098283088, + "grad_norm": 0.6588454246520996, + "learning_rate": 1.356032487475586e-05, + "loss": 0.124908447265625, + "step": 8091 + }, + { + "epoch": 0.5469785047992429, + "grad_norm": 0.49057865142822266, + "learning_rate": 1.3557040611984937e-05, + "loss": 0.124176025390625, + "step": 8092 + }, + { + "epoch": 0.5470460997701772, + "grad_norm": 0.3718799352645874, + "learning_rate": 1.3553756419033357e-05, + "loss": 0.08116912841796875, + "step": 8093 + }, + { + "epoch": 0.5471136947411113, + "grad_norm": 1.1990572214126587, + "learning_rate": 1.3550472296060045e-05, + "loss": 0.17474365234375, + "step": 8094 + }, + { + "epoch": 0.5471812897120454, + "grad_norm": 0.5737170577049255, + "learning_rate": 1.3547188243223891e-05, + "loss": 0.1177825927734375, + "step": 8095 + }, + { + "epoch": 0.5472488846829796, + "grad_norm": 0.5704306364059448, + "learning_rate": 1.3543904260683807e-05, + "loss": 0.0975494384765625, + "step": 8096 + }, + { + "epoch": 0.5473164796539137, + "grad_norm": 0.40186819434165955, + "learning_rate": 1.3540620348598688e-05, + "loss": 0.06003570556640625, + "step": 8097 + }, + { + "epoch": 0.547384074624848, + "grad_norm": 0.467209130525589, + "learning_rate": 1.3537336507127438e-05, + "loss": 0.0839385986328125, + "step": 8098 + }, + { + "epoch": 0.5474516695957821, + "grad_norm": 0.7113438844680786, + "learning_rate": 1.3534052736428944e-05, + "loss": 0.1599273681640625, + "step": 8099 + }, + { + "epoch": 0.5475192645667163, + "grad_norm": 0.2551727592945099, + "learning_rate": 1.353076903666209e-05, + "loss": 0.042125701904296875, + "step": 8100 + }, + { + "epoch": 0.5475868595376504, + "grad_norm": 0.37834179401397705, + "learning_rate": 1.3527485407985768e-05, + "loss": 0.059337615966796875, + "step": 8101 + }, + { + "epoch": 0.5476544545085845, + "grad_norm": 0.4758448302745819, + "learning_rate": 1.3524201850558857e-05, + "loss": 0.1056060791015625, + "step": 8102 + }, + { + "epoch": 0.5477220494795187, + "grad_norm": 0.2960730195045471, + "learning_rate": 1.3520918364540247e-05, + "loss": 0.0564727783203125, + "step": 8103 + }, + { + "epoch": 0.5477896444504529, + "grad_norm": 0.48374292254447937, + "learning_rate": 1.3517634950088795e-05, + "loss": 0.09921646118164062, + "step": 8104 + }, + { + "epoch": 0.5478572394213871, + "grad_norm": 0.776900589466095, + "learning_rate": 1.3514351607363389e-05, + "loss": 0.1784210205078125, + "step": 8105 + }, + { + "epoch": 0.5479248343923212, + "grad_norm": 0.9605110287666321, + "learning_rate": 1.3511068336522886e-05, + "loss": 0.14990234375, + "step": 8106 + }, + { + "epoch": 0.5479924293632554, + "grad_norm": 0.5657411813735962, + "learning_rate": 1.3507785137726163e-05, + "loss": 0.08995819091796875, + "step": 8107 + }, + { + "epoch": 0.5480600243341895, + "grad_norm": 0.7718934416770935, + "learning_rate": 1.350450201113207e-05, + "loss": 0.160736083984375, + "step": 8108 + }, + { + "epoch": 0.5481276193051237, + "grad_norm": 0.21990206837654114, + "learning_rate": 1.3501218956899473e-05, + "loss": 0.03704833984375, + "step": 8109 + }, + { + "epoch": 0.5481952142760579, + "grad_norm": 1.699615478515625, + "learning_rate": 1.3497935975187225e-05, + "loss": 0.187286376953125, + "step": 8110 + }, + { + "epoch": 0.548262809246992, + "grad_norm": 0.9018757343292236, + "learning_rate": 1.3494653066154172e-05, + "loss": 0.07789230346679688, + "step": 8111 + }, + { + "epoch": 0.5483304042179262, + "grad_norm": 0.31231454014778137, + "learning_rate": 1.3491370229959168e-05, + "loss": 0.067840576171875, + "step": 8112 + }, + { + "epoch": 0.5483979991888603, + "grad_norm": 0.1946895271539688, + "learning_rate": 1.3488087466761051e-05, + "loss": 0.037750244140625, + "step": 8113 + }, + { + "epoch": 0.5484655941597945, + "grad_norm": 0.927772045135498, + "learning_rate": 1.348480477671867e-05, + "loss": 0.12511444091796875, + "step": 8114 + }, + { + "epoch": 0.5485331891307287, + "grad_norm": 0.581693172454834, + "learning_rate": 1.3481522159990852e-05, + "loss": 0.08416748046875, + "step": 8115 + }, + { + "epoch": 0.5486007841016628, + "grad_norm": 0.619495689868927, + "learning_rate": 1.3478239616736441e-05, + "loss": 0.08696746826171875, + "step": 8116 + }, + { + "epoch": 0.548668379072597, + "grad_norm": 0.37218719720840454, + "learning_rate": 1.3474957147114254e-05, + "loss": 0.06338882446289062, + "step": 8117 + }, + { + "epoch": 0.5487359740435311, + "grad_norm": 0.5010145902633667, + "learning_rate": 1.3471674751283127e-05, + "loss": 0.12567138671875, + "step": 8118 + }, + { + "epoch": 0.5488035690144654, + "grad_norm": 0.8344937562942505, + "learning_rate": 1.3468392429401878e-05, + "loss": 0.1555023193359375, + "step": 8119 + }, + { + "epoch": 0.5488711639853995, + "grad_norm": 0.6395991444587708, + "learning_rate": 1.3465110181629334e-05, + "loss": 0.08782958984375, + "step": 8120 + }, + { + "epoch": 0.5489387589563336, + "grad_norm": 0.8849732279777527, + "learning_rate": 1.34618280081243e-05, + "loss": 0.1565093994140625, + "step": 8121 + }, + { + "epoch": 0.5490063539272678, + "grad_norm": 1.009842038154602, + "learning_rate": 1.3458545909045596e-05, + "loss": 0.12586212158203125, + "step": 8122 + }, + { + "epoch": 0.5490739488982019, + "grad_norm": 0.1827985644340515, + "learning_rate": 1.3455263884552028e-05, + "loss": 0.03029632568359375, + "step": 8123 + }, + { + "epoch": 0.5491415438691362, + "grad_norm": 1.4654217958450317, + "learning_rate": 1.3451981934802395e-05, + "loss": 0.224334716796875, + "step": 8124 + }, + { + "epoch": 0.5492091388400703, + "grad_norm": 0.31279587745666504, + "learning_rate": 1.3448700059955507e-05, + "loss": 0.06440353393554688, + "step": 8125 + }, + { + "epoch": 0.5492767338110045, + "grad_norm": 1.2813143730163574, + "learning_rate": 1.3445418260170152e-05, + "loss": 0.1337890625, + "step": 8126 + }, + { + "epoch": 0.5493443287819386, + "grad_norm": 0.6571950912475586, + "learning_rate": 1.3442136535605138e-05, + "loss": 0.1370086669921875, + "step": 8127 + }, + { + "epoch": 0.5494119237528727, + "grad_norm": 0.3903385400772095, + "learning_rate": 1.343885488641924e-05, + "loss": 0.0806427001953125, + "step": 8128 + }, + { + "epoch": 0.549479518723807, + "grad_norm": 0.4862575829029083, + "learning_rate": 1.3435573312771254e-05, + "loss": 0.07226371765136719, + "step": 8129 + }, + { + "epoch": 0.5495471136947411, + "grad_norm": 1.0085794925689697, + "learning_rate": 1.3432291814819958e-05, + "loss": 0.202911376953125, + "step": 8130 + }, + { + "epoch": 0.5496147086656753, + "grad_norm": 0.8919947147369385, + "learning_rate": 1.3429010392724134e-05, + "loss": 0.123199462890625, + "step": 8131 + }, + { + "epoch": 0.5496823036366094, + "grad_norm": 0.6666011810302734, + "learning_rate": 1.3425729046642557e-05, + "loss": 0.117034912109375, + "step": 8132 + }, + { + "epoch": 0.5497498986075436, + "grad_norm": 0.5734286308288574, + "learning_rate": 1.3422447776734004e-05, + "loss": 0.124176025390625, + "step": 8133 + }, + { + "epoch": 0.5498174935784778, + "grad_norm": 0.15969838201999664, + "learning_rate": 1.3419166583157235e-05, + "loss": 0.024555206298828125, + "step": 8134 + }, + { + "epoch": 0.5498850885494119, + "grad_norm": 0.28203368186950684, + "learning_rate": 1.3415885466071015e-05, + "loss": 0.0548858642578125, + "step": 8135 + }, + { + "epoch": 0.5499526835203461, + "grad_norm": 0.7091348171234131, + "learning_rate": 1.3412604425634112e-05, + "loss": 0.11406707763671875, + "step": 8136 + }, + { + "epoch": 0.5500202784912802, + "grad_norm": 0.5352673530578613, + "learning_rate": 1.3409323462005275e-05, + "loss": 0.10150146484375, + "step": 8137 + }, + { + "epoch": 0.5500878734622144, + "grad_norm": 0.9597818851470947, + "learning_rate": 1.3406042575343262e-05, + "loss": 0.193634033203125, + "step": 8138 + }, + { + "epoch": 0.5501554684331486, + "grad_norm": 0.20870421826839447, + "learning_rate": 1.340276176580682e-05, + "loss": 0.034687042236328125, + "step": 8139 + }, + { + "epoch": 0.5502230634040828, + "grad_norm": 0.22059766948223114, + "learning_rate": 1.3399481033554704e-05, + "loss": 0.032421112060546875, + "step": 8140 + }, + { + "epoch": 0.5502906583750169, + "grad_norm": 0.4884835481643677, + "learning_rate": 1.3396200378745641e-05, + "loss": 0.08350372314453125, + "step": 8141 + }, + { + "epoch": 0.550358253345951, + "grad_norm": 0.2802291512489319, + "learning_rate": 1.3392919801538383e-05, + "loss": 0.05340576171875, + "step": 8142 + }, + { + "epoch": 0.5504258483168852, + "grad_norm": 0.4786999225616455, + "learning_rate": 1.3389639302091654e-05, + "loss": 0.1175537109375, + "step": 8143 + }, + { + "epoch": 0.5504934432878194, + "grad_norm": 0.9231967329978943, + "learning_rate": 1.3386358880564198e-05, + "loss": 0.165924072265625, + "step": 8144 + }, + { + "epoch": 0.5505610382587536, + "grad_norm": 0.8025830388069153, + "learning_rate": 1.3383078537114734e-05, + "loss": 0.10209083557128906, + "step": 8145 + }, + { + "epoch": 0.5506286332296877, + "grad_norm": 0.760018527507782, + "learning_rate": 1.337979827190198e-05, + "loss": 0.18695068359375, + "step": 8146 + }, + { + "epoch": 0.5506962282006219, + "grad_norm": 0.6848710179328918, + "learning_rate": 1.3376518085084664e-05, + "loss": 0.1076812744140625, + "step": 8147 + }, + { + "epoch": 0.550763823171556, + "grad_norm": 0.32571130990982056, + "learning_rate": 1.3373237976821497e-05, + "loss": 0.036773681640625, + "step": 8148 + }, + { + "epoch": 0.5508314181424901, + "grad_norm": 0.38651043176651, + "learning_rate": 1.3369957947271199e-05, + "loss": 0.0655059814453125, + "step": 8149 + }, + { + "epoch": 0.5508990131134244, + "grad_norm": 1.4748345613479614, + "learning_rate": 1.3366677996592466e-05, + "loss": 0.136749267578125, + "step": 8150 + }, + { + "epoch": 0.5509666080843585, + "grad_norm": 1.3996919393539429, + "learning_rate": 1.3363398124944013e-05, + "loss": 0.20111083984375, + "step": 8151 + }, + { + "epoch": 0.5510342030552927, + "grad_norm": 0.7800412178039551, + "learning_rate": 1.3360118332484532e-05, + "loss": 0.1254730224609375, + "step": 8152 + }, + { + "epoch": 0.5511017980262268, + "grad_norm": 0.49444156885147095, + "learning_rate": 1.3356838619372734e-05, + "loss": 0.123870849609375, + "step": 8153 + }, + { + "epoch": 0.5511693929971611, + "grad_norm": 1.0664010047912598, + "learning_rate": 1.3353558985767292e-05, + "loss": 0.164520263671875, + "step": 8154 + }, + { + "epoch": 0.5512369879680952, + "grad_norm": 1.1257601976394653, + "learning_rate": 1.3350279431826911e-05, + "loss": 0.160919189453125, + "step": 8155 + }, + { + "epoch": 0.5513045829390293, + "grad_norm": 0.4351130425930023, + "learning_rate": 1.3346999957710267e-05, + "loss": 0.0714263916015625, + "step": 8156 + }, + { + "epoch": 0.5513721779099635, + "grad_norm": 0.6060203313827515, + "learning_rate": 1.3343720563576052e-05, + "loss": 0.11997032165527344, + "step": 8157 + }, + { + "epoch": 0.5514397728808976, + "grad_norm": 0.24880453944206238, + "learning_rate": 1.3340441249582933e-05, + "loss": 0.04421234130859375, + "step": 8158 + }, + { + "epoch": 0.5515073678518319, + "grad_norm": 0.5070733428001404, + "learning_rate": 1.3337162015889582e-05, + "loss": 0.1103363037109375, + "step": 8159 + }, + { + "epoch": 0.551574962822766, + "grad_norm": 0.5688675045967102, + "learning_rate": 1.3333882862654681e-05, + "loss": 0.1009979248046875, + "step": 8160 + }, + { + "epoch": 0.5516425577937002, + "grad_norm": 0.5105774402618408, + "learning_rate": 1.3330603790036884e-05, + "loss": 0.1103363037109375, + "step": 8161 + }, + { + "epoch": 0.5517101527646343, + "grad_norm": 1.6704052686691284, + "learning_rate": 1.3327324798194861e-05, + "loss": 0.26080322265625, + "step": 8162 + }, + { + "epoch": 0.5517777477355684, + "grad_norm": 0.49740439653396606, + "learning_rate": 1.3324045887287261e-05, + "loss": 0.1202239990234375, + "step": 8163 + }, + { + "epoch": 0.5518453427065027, + "grad_norm": 0.9751433730125427, + "learning_rate": 1.3320767057472748e-05, + "loss": 0.2357177734375, + "step": 8164 + }, + { + "epoch": 0.5519129376774368, + "grad_norm": 1.12391996383667, + "learning_rate": 1.3317488308909964e-05, + "loss": 0.231658935546875, + "step": 8165 + }, + { + "epoch": 0.551980532648371, + "grad_norm": 0.50259929895401, + "learning_rate": 1.3314209641757567e-05, + "loss": 0.12689208984375, + "step": 8166 + }, + { + "epoch": 0.5520481276193051, + "grad_norm": 0.45191696286201477, + "learning_rate": 1.3310931056174183e-05, + "loss": 0.05869293212890625, + "step": 8167 + }, + { + "epoch": 0.5521157225902393, + "grad_norm": 0.5908268094062805, + "learning_rate": 1.3307652552318462e-05, + "loss": 0.1040802001953125, + "step": 8168 + }, + { + "epoch": 0.5521833175611734, + "grad_norm": 0.914607584476471, + "learning_rate": 1.3304374130349038e-05, + "loss": 0.211151123046875, + "step": 8169 + }, + { + "epoch": 0.5522509125321076, + "grad_norm": 0.6882937550544739, + "learning_rate": 1.3301095790424533e-05, + "loss": 0.15234375, + "step": 8170 + }, + { + "epoch": 0.5523185075030418, + "grad_norm": 0.8184521198272705, + "learning_rate": 1.3297817532703582e-05, + "loss": 0.14197540283203125, + "step": 8171 + }, + { + "epoch": 0.5523861024739759, + "grad_norm": 0.4949752688407898, + "learning_rate": 1.32945393573448e-05, + "loss": 0.10760498046875, + "step": 8172 + }, + { + "epoch": 0.5524536974449101, + "grad_norm": 1.0252437591552734, + "learning_rate": 1.329126126450682e-05, + "loss": 0.222808837890625, + "step": 8173 + }, + { + "epoch": 0.5525212924158442, + "grad_norm": 0.9986220002174377, + "learning_rate": 1.3287983254348237e-05, + "loss": 0.13452911376953125, + "step": 8174 + }, + { + "epoch": 0.5525888873867785, + "grad_norm": 0.9781280159950256, + "learning_rate": 1.328470532702768e-05, + "loss": 0.21160888671875, + "step": 8175 + }, + { + "epoch": 0.5526564823577126, + "grad_norm": 0.8423800468444824, + "learning_rate": 1.3281427482703736e-05, + "loss": 0.131134033203125, + "step": 8176 + }, + { + "epoch": 0.5527240773286467, + "grad_norm": 0.27241072058677673, + "learning_rate": 1.3278149721535024e-05, + "loss": 0.035884857177734375, + "step": 8177 + }, + { + "epoch": 0.5527916722995809, + "grad_norm": 0.6671280860900879, + "learning_rate": 1.3274872043680134e-05, + "loss": 0.12273025512695312, + "step": 8178 + }, + { + "epoch": 0.552859267270515, + "grad_norm": 1.1772313117980957, + "learning_rate": 1.3271594449297668e-05, + "loss": 0.17913818359375, + "step": 8179 + }, + { + "epoch": 0.5529268622414493, + "grad_norm": 0.2840268015861511, + "learning_rate": 1.326831693854621e-05, + "loss": 0.03823089599609375, + "step": 8180 + }, + { + "epoch": 0.5529944572123834, + "grad_norm": 0.49013790488243103, + "learning_rate": 1.3265039511584343e-05, + "loss": 0.082733154296875, + "step": 8181 + }, + { + "epoch": 0.5530620521833176, + "grad_norm": 0.2107897698879242, + "learning_rate": 1.3261762168570661e-05, + "loss": 0.03860282897949219, + "step": 8182 + }, + { + "epoch": 0.5531296471542517, + "grad_norm": 0.281484991312027, + "learning_rate": 1.3258484909663729e-05, + "loss": 0.041072845458984375, + "step": 8183 + }, + { + "epoch": 0.5531972421251858, + "grad_norm": 0.7741205096244812, + "learning_rate": 1.325520773502213e-05, + "loss": 0.11760139465332031, + "step": 8184 + }, + { + "epoch": 0.5532648370961201, + "grad_norm": 0.7752918601036072, + "learning_rate": 1.3251930644804429e-05, + "loss": 0.1364288330078125, + "step": 8185 + }, + { + "epoch": 0.5533324320670542, + "grad_norm": 0.7820404767990112, + "learning_rate": 1.32486536391692e-05, + "loss": 0.1273040771484375, + "step": 8186 + }, + { + "epoch": 0.5534000270379884, + "grad_norm": 0.188389390707016, + "learning_rate": 1.3245376718274994e-05, + "loss": 0.0371856689453125, + "step": 8187 + }, + { + "epoch": 0.5534676220089225, + "grad_norm": 0.5056734085083008, + "learning_rate": 1.3242099882280376e-05, + "loss": 0.09991455078125, + "step": 8188 + }, + { + "epoch": 0.5535352169798567, + "grad_norm": 0.2790784239768982, + "learning_rate": 1.3238823131343897e-05, + "loss": 0.06067657470703125, + "step": 8189 + }, + { + "epoch": 0.5536028119507909, + "grad_norm": 0.778904139995575, + "learning_rate": 1.3235546465624111e-05, + "loss": 0.11105728149414062, + "step": 8190 + }, + { + "epoch": 0.553670406921725, + "grad_norm": 0.9179699420928955, + "learning_rate": 1.3232269885279557e-05, + "loss": 0.21075439453125, + "step": 8191 + }, + { + "epoch": 0.5537380018926592, + "grad_norm": 0.36533331871032715, + "learning_rate": 1.3228993390468783e-05, + "loss": 0.0594635009765625, + "step": 8192 + }, + { + "epoch": 0.5538055968635933, + "grad_norm": 0.8052116632461548, + "learning_rate": 1.3225716981350321e-05, + "loss": 0.1679229736328125, + "step": 8193 + }, + { + "epoch": 0.5538731918345275, + "grad_norm": 0.3132057189941406, + "learning_rate": 1.32224406580827e-05, + "loss": 0.05352020263671875, + "step": 8194 + }, + { + "epoch": 0.5539407868054617, + "grad_norm": 0.3243230879306793, + "learning_rate": 1.3219164420824464e-05, + "loss": 0.06417083740234375, + "step": 8195 + }, + { + "epoch": 0.5540083817763959, + "grad_norm": 0.28074556589126587, + "learning_rate": 1.321588826973412e-05, + "loss": 0.05355262756347656, + "step": 8196 + }, + { + "epoch": 0.55407597674733, + "grad_norm": 0.256981760263443, + "learning_rate": 1.32126122049702e-05, + "loss": 0.042461395263671875, + "step": 8197 + }, + { + "epoch": 0.5541435717182641, + "grad_norm": 0.5247489213943481, + "learning_rate": 1.3209336226691217e-05, + "loss": 0.127166748046875, + "step": 8198 + }, + { + "epoch": 0.5542111666891983, + "grad_norm": 0.29277220368385315, + "learning_rate": 1.3206060335055686e-05, + "loss": 0.04349517822265625, + "step": 8199 + }, + { + "epoch": 0.5542787616601325, + "grad_norm": 0.3435245156288147, + "learning_rate": 1.3202784530222106e-05, + "loss": 0.06640625, + "step": 8200 + }, + { + "epoch": 0.5543463566310667, + "grad_norm": 0.6424391865730286, + "learning_rate": 1.3199508812348993e-05, + "loss": 0.10454940795898438, + "step": 8201 + }, + { + "epoch": 0.5544139516020008, + "grad_norm": 1.2568943500518799, + "learning_rate": 1.3196233181594836e-05, + "loss": 0.170196533203125, + "step": 8202 + }, + { + "epoch": 0.554481546572935, + "grad_norm": 1.2682503461837769, + "learning_rate": 1.3192957638118142e-05, + "loss": 0.1661224365234375, + "step": 8203 + }, + { + "epoch": 0.5545491415438691, + "grad_norm": 0.386251837015152, + "learning_rate": 1.3189682182077392e-05, + "loss": 0.073333740234375, + "step": 8204 + }, + { + "epoch": 0.5546167365148033, + "grad_norm": 0.6580986380577087, + "learning_rate": 1.3186406813631071e-05, + "loss": 0.1193084716796875, + "step": 8205 + }, + { + "epoch": 0.5546843314857375, + "grad_norm": 0.7648612260818481, + "learning_rate": 1.3183131532937678e-05, + "loss": 0.12813568115234375, + "step": 8206 + }, + { + "epoch": 0.5547519264566716, + "grad_norm": 0.38530492782592773, + "learning_rate": 1.317985634015567e-05, + "loss": 0.062774658203125, + "step": 8207 + }, + { + "epoch": 0.5548195214276058, + "grad_norm": 0.24822619557380676, + "learning_rate": 1.317658123544354e-05, + "loss": 0.048778533935546875, + "step": 8208 + }, + { + "epoch": 0.5548871163985399, + "grad_norm": 0.3427797853946686, + "learning_rate": 1.3173306218959743e-05, + "loss": 0.04632568359375, + "step": 8209 + }, + { + "epoch": 0.5549547113694742, + "grad_norm": 0.4842718243598938, + "learning_rate": 1.3170031290862753e-05, + "loss": 0.115447998046875, + "step": 8210 + }, + { + "epoch": 0.5550223063404083, + "grad_norm": 0.44202229380607605, + "learning_rate": 1.3166756451311028e-05, + "loss": 0.062137603759765625, + "step": 8211 + }, + { + "epoch": 0.5550899013113424, + "grad_norm": 0.2975333631038666, + "learning_rate": 1.3163481700463032e-05, + "loss": 0.05251121520996094, + "step": 8212 + }, + { + "epoch": 0.5551574962822766, + "grad_norm": 0.3721979558467865, + "learning_rate": 1.3160207038477207e-05, + "loss": 0.06636428833007812, + "step": 8213 + }, + { + "epoch": 0.5552250912532107, + "grad_norm": 1.214473009109497, + "learning_rate": 1.315693246551201e-05, + "loss": 0.1500701904296875, + "step": 8214 + }, + { + "epoch": 0.555292686224145, + "grad_norm": 0.43647870421409607, + "learning_rate": 1.3153657981725885e-05, + "loss": 0.07537078857421875, + "step": 8215 + }, + { + "epoch": 0.5553602811950791, + "grad_norm": 0.5323039889335632, + "learning_rate": 1.3150383587277262e-05, + "loss": 0.110595703125, + "step": 8216 + }, + { + "epoch": 0.5554278761660133, + "grad_norm": 0.346144437789917, + "learning_rate": 1.3147109282324588e-05, + "loss": 0.06993484497070312, + "step": 8217 + }, + { + "epoch": 0.5554954711369474, + "grad_norm": 1.1429321765899658, + "learning_rate": 1.3143835067026286e-05, + "loss": 0.169036865234375, + "step": 8218 + }, + { + "epoch": 0.5555630661078815, + "grad_norm": 0.9318029284477234, + "learning_rate": 1.3140560941540795e-05, + "loss": 0.18572998046875, + "step": 8219 + }, + { + "epoch": 0.5556306610788158, + "grad_norm": 1.2615431547164917, + "learning_rate": 1.3137286906026522e-05, + "loss": 0.162384033203125, + "step": 8220 + }, + { + "epoch": 0.5556982560497499, + "grad_norm": 0.8452584743499756, + "learning_rate": 1.31340129606419e-05, + "loss": 0.11060523986816406, + "step": 8221 + }, + { + "epoch": 0.5557658510206841, + "grad_norm": 0.3719280958175659, + "learning_rate": 1.3130739105545326e-05, + "loss": 0.05109405517578125, + "step": 8222 + }, + { + "epoch": 0.5558334459916182, + "grad_norm": 0.38027456402778625, + "learning_rate": 1.3127465340895223e-05, + "loss": 0.05381011962890625, + "step": 8223 + }, + { + "epoch": 0.5559010409625524, + "grad_norm": 1.132063865661621, + "learning_rate": 1.3124191666849992e-05, + "loss": 0.159698486328125, + "step": 8224 + }, + { + "epoch": 0.5559686359334866, + "grad_norm": 0.9084042310714722, + "learning_rate": 1.3120918083568036e-05, + "loss": 0.1215667724609375, + "step": 8225 + }, + { + "epoch": 0.5560362309044207, + "grad_norm": 1.358941674232483, + "learning_rate": 1.3117644591207745e-05, + "loss": 0.21649169921875, + "step": 8226 + }, + { + "epoch": 0.5561038258753549, + "grad_norm": 0.39244744181632996, + "learning_rate": 1.3114371189927516e-05, + "loss": 0.06646728515625, + "step": 8227 + }, + { + "epoch": 0.556171420846289, + "grad_norm": 0.5228601098060608, + "learning_rate": 1.3111097879885743e-05, + "loss": 0.110504150390625, + "step": 8228 + }, + { + "epoch": 0.5562390158172232, + "grad_norm": 0.5958653092384338, + "learning_rate": 1.310782466124079e-05, + "loss": 0.101837158203125, + "step": 8229 + }, + { + "epoch": 0.5563066107881574, + "grad_norm": 1.0506513118743896, + "learning_rate": 1.3104551534151053e-05, + "loss": 0.228515625, + "step": 8230 + }, + { + "epoch": 0.5563742057590916, + "grad_norm": 0.2768450081348419, + "learning_rate": 1.3101278498774898e-05, + "loss": 0.06829071044921875, + "step": 8231 + }, + { + "epoch": 0.5564418007300257, + "grad_norm": 0.48644787073135376, + "learning_rate": 1.3098005555270701e-05, + "loss": 0.105194091796875, + "step": 8232 + }, + { + "epoch": 0.5565093957009598, + "grad_norm": 0.2666798233985901, + "learning_rate": 1.3094732703796819e-05, + "loss": 0.057804107666015625, + "step": 8233 + }, + { + "epoch": 0.556576990671894, + "grad_norm": 0.723308265209198, + "learning_rate": 1.3091459944511621e-05, + "loss": 0.11109542846679688, + "step": 8234 + }, + { + "epoch": 0.5566445856428281, + "grad_norm": 0.262847363948822, + "learning_rate": 1.3088187277573455e-05, + "loss": 0.04888725280761719, + "step": 8235 + }, + { + "epoch": 0.5567121806137624, + "grad_norm": 0.9561873078346252, + "learning_rate": 1.3084914703140685e-05, + "loss": 0.16632843017578125, + "step": 8236 + }, + { + "epoch": 0.5567797755846965, + "grad_norm": 0.7031109929084778, + "learning_rate": 1.3081642221371646e-05, + "loss": 0.147674560546875, + "step": 8237 + }, + { + "epoch": 0.5568473705556307, + "grad_norm": 0.6055300235748291, + "learning_rate": 1.3078369832424694e-05, + "loss": 0.151580810546875, + "step": 8238 + }, + { + "epoch": 0.5569149655265648, + "grad_norm": 0.26707249879837036, + "learning_rate": 1.3075097536458154e-05, + "loss": 0.04248046875, + "step": 8239 + }, + { + "epoch": 0.556982560497499, + "grad_norm": 0.3334835171699524, + "learning_rate": 1.3071825333630365e-05, + "loss": 0.057464599609375, + "step": 8240 + }, + { + "epoch": 0.5570501554684332, + "grad_norm": 0.7515231966972351, + "learning_rate": 1.3068553224099663e-05, + "loss": 0.240020751953125, + "step": 8241 + }, + { + "epoch": 0.5571177504393673, + "grad_norm": 0.39955031871795654, + "learning_rate": 1.306528120802436e-05, + "loss": 0.08522796630859375, + "step": 8242 + }, + { + "epoch": 0.5571853454103015, + "grad_norm": 0.5092965364456177, + "learning_rate": 1.306200928556279e-05, + "loss": 0.0854644775390625, + "step": 8243 + }, + { + "epoch": 0.5572529403812356, + "grad_norm": 0.35980239510536194, + "learning_rate": 1.3058737456873257e-05, + "loss": 0.0756988525390625, + "step": 8244 + }, + { + "epoch": 0.5573205353521697, + "grad_norm": 1.02198326587677, + "learning_rate": 1.3055465722114087e-05, + "loss": 0.1573028564453125, + "step": 8245 + }, + { + "epoch": 0.557388130323104, + "grad_norm": 0.49552759528160095, + "learning_rate": 1.3052194081443572e-05, + "loss": 0.07929611206054688, + "step": 8246 + }, + { + "epoch": 0.5574557252940381, + "grad_norm": 1.5641529560089111, + "learning_rate": 1.3048922535020021e-05, + "loss": 0.210296630859375, + "step": 8247 + }, + { + "epoch": 0.5575233202649723, + "grad_norm": 0.3131176233291626, + "learning_rate": 1.304565108300173e-05, + "loss": 0.058765411376953125, + "step": 8248 + }, + { + "epoch": 0.5575909152359064, + "grad_norm": 0.643895149230957, + "learning_rate": 1.3042379725547001e-05, + "loss": 0.113128662109375, + "step": 8249 + }, + { + "epoch": 0.5576585102068407, + "grad_norm": 0.5274330973625183, + "learning_rate": 1.3039108462814107e-05, + "loss": 0.10810089111328125, + "step": 8250 + }, + { + "epoch": 0.5577261051777748, + "grad_norm": 0.307550847530365, + "learning_rate": 1.3035837294961344e-05, + "loss": 0.038631439208984375, + "step": 8251 + }, + { + "epoch": 0.5577937001487089, + "grad_norm": 0.5132392644882202, + "learning_rate": 1.303256622214699e-05, + "loss": 0.1229400634765625, + "step": 8252 + }, + { + "epoch": 0.5578612951196431, + "grad_norm": 0.24605271220207214, + "learning_rate": 1.3029295244529312e-05, + "loss": 0.04907989501953125, + "step": 8253 + }, + { + "epoch": 0.5579288900905772, + "grad_norm": 0.7038127779960632, + "learning_rate": 1.3026024362266594e-05, + "loss": 0.0843658447265625, + "step": 8254 + }, + { + "epoch": 0.5579964850615114, + "grad_norm": 0.716160774230957, + "learning_rate": 1.3022753575517085e-05, + "loss": 0.0926513671875, + "step": 8255 + }, + { + "epoch": 0.5580640800324456, + "grad_norm": 1.7679787874221802, + "learning_rate": 1.3019482884439058e-05, + "loss": 0.20550537109375, + "step": 8256 + }, + { + "epoch": 0.5581316750033798, + "grad_norm": 0.8591175079345703, + "learning_rate": 1.3016212289190762e-05, + "loss": 0.12265777587890625, + "step": 8257 + }, + { + "epoch": 0.5581992699743139, + "grad_norm": 0.5489758849143982, + "learning_rate": 1.3012941789930457e-05, + "loss": 0.07852554321289062, + "step": 8258 + }, + { + "epoch": 0.558266864945248, + "grad_norm": 0.9240589737892151, + "learning_rate": 1.3009671386816383e-05, + "loss": 0.158203125, + "step": 8259 + }, + { + "epoch": 0.5583344599161822, + "grad_norm": 1.0687875747680664, + "learning_rate": 1.3006401080006787e-05, + "loss": 0.213226318359375, + "step": 8260 + }, + { + "epoch": 0.5584020548871164, + "grad_norm": 0.7721866369247437, + "learning_rate": 1.30031308696599e-05, + "loss": 0.200927734375, + "step": 8261 + }, + { + "epoch": 0.5584696498580506, + "grad_norm": 0.28460606932640076, + "learning_rate": 1.2999860755933969e-05, + "loss": 0.041492462158203125, + "step": 8262 + }, + { + "epoch": 0.5585372448289847, + "grad_norm": 1.4333912134170532, + "learning_rate": 1.2996590738987208e-05, + "loss": 0.1521453857421875, + "step": 8263 + }, + { + "epoch": 0.5586048397999189, + "grad_norm": 0.376129686832428, + "learning_rate": 1.2993320818977845e-05, + "loss": 0.05507659912109375, + "step": 8264 + }, + { + "epoch": 0.558672434770853, + "grad_norm": 0.9662097692489624, + "learning_rate": 1.2990050996064104e-05, + "loss": 0.1589813232421875, + "step": 8265 + }, + { + "epoch": 0.5587400297417872, + "grad_norm": 0.35089728236198425, + "learning_rate": 1.298678127040419e-05, + "loss": 0.058757781982421875, + "step": 8266 + }, + { + "epoch": 0.5588076247127214, + "grad_norm": 0.24583610892295837, + "learning_rate": 1.2983511642156327e-05, + "loss": 0.05594635009765625, + "step": 8267 + }, + { + "epoch": 0.5588752196836555, + "grad_norm": 0.406444787979126, + "learning_rate": 1.2980242111478702e-05, + "loss": 0.0878753662109375, + "step": 8268 + }, + { + "epoch": 0.5589428146545897, + "grad_norm": 0.7462129592895508, + "learning_rate": 1.2976972678529527e-05, + "loss": 0.1262054443359375, + "step": 8269 + }, + { + "epoch": 0.5590104096255238, + "grad_norm": 0.16205789148807526, + "learning_rate": 1.2973703343466995e-05, + "loss": 0.028961181640625, + "step": 8270 + }, + { + "epoch": 0.5590780045964581, + "grad_norm": 0.4143817722797394, + "learning_rate": 1.29704341064493e-05, + "loss": 0.06779861450195312, + "step": 8271 + }, + { + "epoch": 0.5591455995673922, + "grad_norm": 0.33822953701019287, + "learning_rate": 1.296716496763462e-05, + "loss": 0.060863494873046875, + "step": 8272 + }, + { + "epoch": 0.5592131945383263, + "grad_norm": 0.9800227284431458, + "learning_rate": 1.2963895927181142e-05, + "loss": 0.16802978515625, + "step": 8273 + }, + { + "epoch": 0.5592807895092605, + "grad_norm": 0.5482617616653442, + "learning_rate": 1.2960626985247047e-05, + "loss": 0.11642074584960938, + "step": 8274 + }, + { + "epoch": 0.5593483844801946, + "grad_norm": 0.845132052898407, + "learning_rate": 1.2957358141990494e-05, + "loss": 0.193267822265625, + "step": 8275 + }, + { + "epoch": 0.5594159794511289, + "grad_norm": 0.9818940162658691, + "learning_rate": 1.2954089397569659e-05, + "loss": 0.207763671875, + "step": 8276 + }, + { + "epoch": 0.559483574422063, + "grad_norm": 0.28072792291641235, + "learning_rate": 1.2950820752142702e-05, + "loss": 0.038867950439453125, + "step": 8277 + }, + { + "epoch": 0.5595511693929972, + "grad_norm": 0.5180978178977966, + "learning_rate": 1.2947552205867785e-05, + "loss": 0.1005401611328125, + "step": 8278 + }, + { + "epoch": 0.5596187643639313, + "grad_norm": 0.2609180212020874, + "learning_rate": 1.294428375890305e-05, + "loss": 0.04694366455078125, + "step": 8279 + }, + { + "epoch": 0.5596863593348654, + "grad_norm": 0.5345711708068848, + "learning_rate": 1.2941015411406655e-05, + "loss": 0.06420135498046875, + "step": 8280 + }, + { + "epoch": 0.5597539543057997, + "grad_norm": 0.29607954621315, + "learning_rate": 1.2937747163536736e-05, + "loss": 0.0635223388671875, + "step": 8281 + }, + { + "epoch": 0.5598215492767338, + "grad_norm": 2.1061935424804688, + "learning_rate": 1.293447901545144e-05, + "loss": 0.231964111328125, + "step": 8282 + }, + { + "epoch": 0.559889144247668, + "grad_norm": 0.31178605556488037, + "learning_rate": 1.2931210967308888e-05, + "loss": 0.04239845275878906, + "step": 8283 + }, + { + "epoch": 0.5599567392186021, + "grad_norm": 0.7975922226905823, + "learning_rate": 1.2927943019267223e-05, + "loss": 0.1676788330078125, + "step": 8284 + }, + { + "epoch": 0.5600243341895363, + "grad_norm": 0.5311794877052307, + "learning_rate": 1.2924675171484551e-05, + "loss": 0.106170654296875, + "step": 8285 + }, + { + "epoch": 0.5600919291604705, + "grad_norm": 1.582862138748169, + "learning_rate": 1.2921407424119008e-05, + "loss": 0.26715087890625, + "step": 8286 + }, + { + "epoch": 0.5601595241314046, + "grad_norm": 0.6475286483764648, + "learning_rate": 1.2918139777328701e-05, + "loss": 0.12005615234375, + "step": 8287 + }, + { + "epoch": 0.5602271191023388, + "grad_norm": 0.665372908115387, + "learning_rate": 1.2914872231271732e-05, + "loss": 0.11661529541015625, + "step": 8288 + }, + { + "epoch": 0.5602947140732729, + "grad_norm": 0.5132036209106445, + "learning_rate": 1.2911604786106216e-05, + "loss": 0.08905029296875, + "step": 8289 + }, + { + "epoch": 0.5603623090442071, + "grad_norm": 0.41901659965515137, + "learning_rate": 1.2908337441990245e-05, + "loss": 0.0799560546875, + "step": 8290 + }, + { + "epoch": 0.5604299040151413, + "grad_norm": 0.8601100444793701, + "learning_rate": 1.2905070199081922e-05, + "loss": 0.141693115234375, + "step": 8291 + }, + { + "epoch": 0.5604974989860755, + "grad_norm": 0.780083417892456, + "learning_rate": 1.2901803057539325e-05, + "loss": 0.103912353515625, + "step": 8292 + }, + { + "epoch": 0.5605650939570096, + "grad_norm": 1.9966368675231934, + "learning_rate": 1.2898536017520548e-05, + "loss": 0.17515945434570312, + "step": 8293 + }, + { + "epoch": 0.5606326889279437, + "grad_norm": 0.46318939328193665, + "learning_rate": 1.2895269079183664e-05, + "loss": 0.0970001220703125, + "step": 8294 + }, + { + "epoch": 0.5607002838988779, + "grad_norm": 0.8312498331069946, + "learning_rate": 1.2892002242686756e-05, + "loss": 0.168182373046875, + "step": 8295 + }, + { + "epoch": 0.560767878869812, + "grad_norm": 0.361690491437912, + "learning_rate": 1.2888735508187884e-05, + "loss": 0.0720062255859375, + "step": 8296 + }, + { + "epoch": 0.5608354738407463, + "grad_norm": 0.5587129592895508, + "learning_rate": 1.2885468875845122e-05, + "loss": 0.110076904296875, + "step": 8297 + }, + { + "epoch": 0.5609030688116804, + "grad_norm": 0.3777584135532379, + "learning_rate": 1.2882202345816528e-05, + "loss": 0.08906936645507812, + "step": 8298 + }, + { + "epoch": 0.5609706637826146, + "grad_norm": 0.49844422936439514, + "learning_rate": 1.287893591826015e-05, + "loss": 0.109130859375, + "step": 8299 + }, + { + "epoch": 0.5610382587535487, + "grad_norm": 0.3716975748538971, + "learning_rate": 1.2875669593334048e-05, + "loss": 0.04523134231567383, + "step": 8300 + }, + { + "epoch": 0.5611058537244829, + "grad_norm": 0.45520898699760437, + "learning_rate": 1.2872403371196255e-05, + "loss": 0.0890350341796875, + "step": 8301 + }, + { + "epoch": 0.5611734486954171, + "grad_norm": 0.3370661437511444, + "learning_rate": 1.2869137252004822e-05, + "loss": 0.0504913330078125, + "step": 8302 + }, + { + "epoch": 0.5612410436663512, + "grad_norm": 0.9270862936973572, + "learning_rate": 1.2865871235917779e-05, + "loss": 0.178558349609375, + "step": 8303 + }, + { + "epoch": 0.5613086386372854, + "grad_norm": 0.36577340960502625, + "learning_rate": 1.2862605323093162e-05, + "loss": 0.078643798828125, + "step": 8304 + }, + { + "epoch": 0.5613762336082195, + "grad_norm": 0.4227314889431, + "learning_rate": 1.2859339513688986e-05, + "loss": 0.09293365478515625, + "step": 8305 + }, + { + "epoch": 0.5614438285791538, + "grad_norm": 0.20316699147224426, + "learning_rate": 1.2856073807863279e-05, + "loss": 0.04280662536621094, + "step": 8306 + }, + { + "epoch": 0.5615114235500879, + "grad_norm": 0.7891659736633301, + "learning_rate": 1.2852808205774053e-05, + "loss": 0.11107254028320312, + "step": 8307 + }, + { + "epoch": 0.561579018521022, + "grad_norm": 0.1798054277896881, + "learning_rate": 1.2849542707579324e-05, + "loss": 0.025348663330078125, + "step": 8308 + }, + { + "epoch": 0.5616466134919562, + "grad_norm": 0.22768534719944, + "learning_rate": 1.2846277313437091e-05, + "loss": 0.041507720947265625, + "step": 8309 + }, + { + "epoch": 0.5617142084628903, + "grad_norm": 0.4923444390296936, + "learning_rate": 1.284301202350535e-05, + "loss": 0.1006927490234375, + "step": 8310 + }, + { + "epoch": 0.5617818034338246, + "grad_norm": 0.6548083424568176, + "learning_rate": 1.283974683794211e-05, + "loss": 0.1084747314453125, + "step": 8311 + }, + { + "epoch": 0.5618493984047587, + "grad_norm": 0.20276078581809998, + "learning_rate": 1.2836481756905345e-05, + "loss": 0.04405975341796875, + "step": 8312 + }, + { + "epoch": 0.5619169933756929, + "grad_norm": 1.1047691106796265, + "learning_rate": 1.2833216780553055e-05, + "loss": 0.214447021484375, + "step": 8313 + }, + { + "epoch": 0.561984588346627, + "grad_norm": 0.4891000986099243, + "learning_rate": 1.2829951909043203e-05, + "loss": 0.080963134765625, + "step": 8314 + }, + { + "epoch": 0.5620521833175611, + "grad_norm": 0.745002269744873, + "learning_rate": 1.2826687142533778e-05, + "loss": 0.09328460693359375, + "step": 8315 + }, + { + "epoch": 0.5621197782884954, + "grad_norm": 0.36882469058036804, + "learning_rate": 1.2823422481182741e-05, + "loss": 0.06471061706542969, + "step": 8316 + }, + { + "epoch": 0.5621873732594295, + "grad_norm": 0.773558497428894, + "learning_rate": 1.2820157925148068e-05, + "loss": 0.158599853515625, + "step": 8317 + }, + { + "epoch": 0.5622549682303637, + "grad_norm": 0.5351260900497437, + "learning_rate": 1.2816893474587704e-05, + "loss": 0.086395263671875, + "step": 8318 + }, + { + "epoch": 0.5623225632012978, + "grad_norm": 0.6792226433753967, + "learning_rate": 1.2813629129659614e-05, + "loss": 0.10422134399414062, + "step": 8319 + }, + { + "epoch": 0.562390158172232, + "grad_norm": 0.817028820514679, + "learning_rate": 1.2810364890521738e-05, + "loss": 0.13267135620117188, + "step": 8320 + }, + { + "epoch": 0.5624577531431662, + "grad_norm": 0.46170908212661743, + "learning_rate": 1.2807100757332033e-05, + "loss": 0.093719482421875, + "step": 8321 + }, + { + "epoch": 0.5625253481141003, + "grad_norm": 0.19510136544704437, + "learning_rate": 1.280383673024843e-05, + "loss": 0.029409408569335938, + "step": 8322 + }, + { + "epoch": 0.5625929430850345, + "grad_norm": 0.7688482999801636, + "learning_rate": 1.280057280942886e-05, + "loss": 0.08905792236328125, + "step": 8323 + }, + { + "epoch": 0.5626605380559686, + "grad_norm": 0.8335273265838623, + "learning_rate": 1.279730899503126e-05, + "loss": 0.1284027099609375, + "step": 8324 + }, + { + "epoch": 0.5627281330269028, + "grad_norm": 0.3772090673446655, + "learning_rate": 1.2794045287213542e-05, + "loss": 0.091796875, + "step": 8325 + }, + { + "epoch": 0.562795727997837, + "grad_norm": 1.298144817352295, + "learning_rate": 1.2790781686133638e-05, + "loss": 0.17413330078125, + "step": 8326 + }, + { + "epoch": 0.5628633229687712, + "grad_norm": 0.7035475969314575, + "learning_rate": 1.2787518191949452e-05, + "loss": 0.11057281494140625, + "step": 8327 + }, + { + "epoch": 0.5629309179397053, + "grad_norm": 0.354085773229599, + "learning_rate": 1.2784254804818898e-05, + "loss": 0.06423187255859375, + "step": 8328 + }, + { + "epoch": 0.5629985129106394, + "grad_norm": 1.2995083332061768, + "learning_rate": 1.2780991524899872e-05, + "loss": 0.241119384765625, + "step": 8329 + }, + { + "epoch": 0.5630661078815736, + "grad_norm": 0.6758328676223755, + "learning_rate": 1.2777728352350283e-05, + "loss": 0.105865478515625, + "step": 8330 + }, + { + "epoch": 0.5631337028525077, + "grad_norm": 0.9986947178840637, + "learning_rate": 1.277446528732801e-05, + "loss": 0.20867919921875, + "step": 8331 + }, + { + "epoch": 0.563201297823442, + "grad_norm": 0.6336528062820435, + "learning_rate": 1.2771202329990947e-05, + "loss": 0.08774852752685547, + "step": 8332 + }, + { + "epoch": 0.5632688927943761, + "grad_norm": 0.2511793076992035, + "learning_rate": 1.2767939480496982e-05, + "loss": 0.047183990478515625, + "step": 8333 + }, + { + "epoch": 0.5633364877653103, + "grad_norm": 0.7380392551422119, + "learning_rate": 1.2764676739003979e-05, + "loss": 0.1236419677734375, + "step": 8334 + }, + { + "epoch": 0.5634040827362444, + "grad_norm": 0.19822369515895844, + "learning_rate": 1.276141410566982e-05, + "loss": 0.03935718536376953, + "step": 8335 + }, + { + "epoch": 0.5634716777071785, + "grad_norm": 0.2399415820837021, + "learning_rate": 1.2758151580652365e-05, + "loss": 0.035488128662109375, + "step": 8336 + }, + { + "epoch": 0.5635392726781128, + "grad_norm": 1.0716371536254883, + "learning_rate": 1.2754889164109485e-05, + "loss": 0.13739585876464844, + "step": 8337 + }, + { + "epoch": 0.5636068676490469, + "grad_norm": 1.0009971857070923, + "learning_rate": 1.2751626856199022e-05, + "loss": 0.179473876953125, + "step": 8338 + }, + { + "epoch": 0.5636744626199811, + "grad_norm": 0.43714335560798645, + "learning_rate": 1.2748364657078837e-05, + "loss": 0.06495952606201172, + "step": 8339 + }, + { + "epoch": 0.5637420575909152, + "grad_norm": 0.7732889652252197, + "learning_rate": 1.2745102566906771e-05, + "loss": 0.12957763671875, + "step": 8340 + }, + { + "epoch": 0.5638096525618495, + "grad_norm": 0.3534374237060547, + "learning_rate": 1.2741840585840673e-05, + "loss": 0.0763397216796875, + "step": 8341 + }, + { + "epoch": 0.5638772475327836, + "grad_norm": 1.032328486442566, + "learning_rate": 1.2738578714038361e-05, + "loss": 0.1646728515625, + "step": 8342 + }, + { + "epoch": 0.5639448425037177, + "grad_norm": 0.24079477787017822, + "learning_rate": 1.2735316951657681e-05, + "loss": 0.037960052490234375, + "step": 8343 + }, + { + "epoch": 0.5640124374746519, + "grad_norm": 0.8195894360542297, + "learning_rate": 1.2732055298856453e-05, + "loss": 0.175811767578125, + "step": 8344 + }, + { + "epoch": 0.564080032445586, + "grad_norm": 0.2717156410217285, + "learning_rate": 1.2728793755792486e-05, + "loss": 0.049560546875, + "step": 8345 + }, + { + "epoch": 0.5641476274165202, + "grad_norm": 0.610342264175415, + "learning_rate": 1.272553232262361e-05, + "loss": 0.1284332275390625, + "step": 8346 + }, + { + "epoch": 0.5642152223874544, + "grad_norm": 0.5662753582000732, + "learning_rate": 1.2722270999507619e-05, + "loss": 0.11002349853515625, + "step": 8347 + }, + { + "epoch": 0.5642828173583886, + "grad_norm": 0.44265326857566833, + "learning_rate": 1.2719009786602323e-05, + "loss": 0.09398078918457031, + "step": 8348 + }, + { + "epoch": 0.5643504123293227, + "grad_norm": 0.25606539845466614, + "learning_rate": 1.2715748684065518e-05, + "loss": 0.0434112548828125, + "step": 8349 + }, + { + "epoch": 0.5644180073002568, + "grad_norm": 0.4344378411769867, + "learning_rate": 1.2712487692055003e-05, + "loss": 0.08281707763671875, + "step": 8350 + }, + { + "epoch": 0.564485602271191, + "grad_norm": 0.6073119044303894, + "learning_rate": 1.270922681072855e-05, + "loss": 0.09380340576171875, + "step": 8351 + }, + { + "epoch": 0.5645531972421252, + "grad_norm": 0.31372711062431335, + "learning_rate": 1.2705966040243955e-05, + "loss": 0.042362213134765625, + "step": 8352 + }, + { + "epoch": 0.5646207922130594, + "grad_norm": 0.28722330927848816, + "learning_rate": 1.2702705380758989e-05, + "loss": 0.059417724609375, + "step": 8353 + }, + { + "epoch": 0.5646883871839935, + "grad_norm": 0.9432055950164795, + "learning_rate": 1.2699444832431427e-05, + "loss": 0.15044403076171875, + "step": 8354 + }, + { + "epoch": 0.5647559821549277, + "grad_norm": 0.521682620048523, + "learning_rate": 1.2696184395419025e-05, + "loss": 0.097442626953125, + "step": 8355 + }, + { + "epoch": 0.5648235771258618, + "grad_norm": 0.17069171369075775, + "learning_rate": 1.2692924069879553e-05, + "loss": 0.03774261474609375, + "step": 8356 + }, + { + "epoch": 0.564891172096796, + "grad_norm": 0.4141019880771637, + "learning_rate": 1.2689663855970765e-05, + "loss": 0.08393096923828125, + "step": 8357 + }, + { + "epoch": 0.5649587670677302, + "grad_norm": 0.6875467300415039, + "learning_rate": 1.2686403753850401e-05, + "loss": 0.14013671875, + "step": 8358 + }, + { + "epoch": 0.5650263620386643, + "grad_norm": 0.5767071843147278, + "learning_rate": 1.2683143763676218e-05, + "loss": 0.1329193115234375, + "step": 8359 + }, + { + "epoch": 0.5650939570095985, + "grad_norm": 1.2520105838775635, + "learning_rate": 1.267988388560594e-05, + "loss": 0.2375946044921875, + "step": 8360 + }, + { + "epoch": 0.5651615519805326, + "grad_norm": 0.48859402537345886, + "learning_rate": 1.2676624119797315e-05, + "loss": 0.1183624267578125, + "step": 8361 + }, + { + "epoch": 0.5652291469514669, + "grad_norm": 0.4852737486362457, + "learning_rate": 1.2673364466408058e-05, + "loss": 0.072509765625, + "step": 8362 + }, + { + "epoch": 0.565296741922401, + "grad_norm": 0.2180386781692505, + "learning_rate": 1.2670104925595905e-05, + "loss": 0.033649444580078125, + "step": 8363 + }, + { + "epoch": 0.5653643368933351, + "grad_norm": 0.15777629613876343, + "learning_rate": 1.266684549751856e-05, + "loss": 0.026760101318359375, + "step": 8364 + }, + { + "epoch": 0.5654319318642693, + "grad_norm": 0.28039246797561646, + "learning_rate": 1.2663586182333739e-05, + "loss": 0.06880950927734375, + "step": 8365 + }, + { + "epoch": 0.5654995268352034, + "grad_norm": 0.3606339395046234, + "learning_rate": 1.2660326980199148e-05, + "loss": 0.0611419677734375, + "step": 8366 + }, + { + "epoch": 0.5655671218061377, + "grad_norm": 0.44247791171073914, + "learning_rate": 1.2657067891272495e-05, + "loss": 0.06331634521484375, + "step": 8367 + }, + { + "epoch": 0.5656347167770718, + "grad_norm": 0.20603272318840027, + "learning_rate": 1.2653808915711462e-05, + "loss": 0.02794647216796875, + "step": 8368 + }, + { + "epoch": 0.565702311748006, + "grad_norm": 0.27191653847694397, + "learning_rate": 1.2650550053673744e-05, + "loss": 0.037937164306640625, + "step": 8369 + }, + { + "epoch": 0.5657699067189401, + "grad_norm": 1.6011784076690674, + "learning_rate": 1.264729130531703e-05, + "loss": 0.223602294921875, + "step": 8370 + }, + { + "epoch": 0.5658375016898742, + "grad_norm": 0.29137131571769714, + "learning_rate": 1.2644032670798988e-05, + "loss": 0.06134033203125, + "step": 8371 + }, + { + "epoch": 0.5659050966608085, + "grad_norm": 1.787097454071045, + "learning_rate": 1.26407741502773e-05, + "loss": 0.2679443359375, + "step": 8372 + }, + { + "epoch": 0.5659726916317426, + "grad_norm": 0.35747793316841125, + "learning_rate": 1.2637515743909628e-05, + "loss": 0.06229114532470703, + "step": 8373 + }, + { + "epoch": 0.5660402866026768, + "grad_norm": 0.7451872825622559, + "learning_rate": 1.2634257451853643e-05, + "loss": 0.1419830322265625, + "step": 8374 + }, + { + "epoch": 0.5661078815736109, + "grad_norm": 0.25346776843070984, + "learning_rate": 1.2630999274266989e-05, + "loss": 0.04032135009765625, + "step": 8375 + }, + { + "epoch": 0.566175476544545, + "grad_norm": 0.3021376430988312, + "learning_rate": 1.2627741211307327e-05, + "loss": 0.055881500244140625, + "step": 8376 + }, + { + "epoch": 0.5662430715154793, + "grad_norm": 0.6411466598510742, + "learning_rate": 1.2624483263132294e-05, + "loss": 0.1070404052734375, + "step": 8377 + }, + { + "epoch": 0.5663106664864134, + "grad_norm": 0.8001646995544434, + "learning_rate": 1.2621225429899535e-05, + "loss": 0.1148681640625, + "step": 8378 + }, + { + "epoch": 0.5663782614573476, + "grad_norm": 0.6627070307731628, + "learning_rate": 1.2617967711766688e-05, + "loss": 0.097900390625, + "step": 8379 + }, + { + "epoch": 0.5664458564282817, + "grad_norm": 0.4932195246219635, + "learning_rate": 1.2614710108891368e-05, + "loss": 0.10614013671875, + "step": 8380 + }, + { + "epoch": 0.5665134513992159, + "grad_norm": 0.538599967956543, + "learning_rate": 1.2611452621431212e-05, + "loss": 0.10097503662109375, + "step": 8381 + }, + { + "epoch": 0.56658104637015, + "grad_norm": 1.174904465675354, + "learning_rate": 1.260819524954383e-05, + "loss": 0.1861572265625, + "step": 8382 + }, + { + "epoch": 0.5666486413410842, + "grad_norm": 0.3597679138183594, + "learning_rate": 1.2604937993386842e-05, + "loss": 0.0541534423828125, + "step": 8383 + }, + { + "epoch": 0.5667162363120184, + "grad_norm": 0.3464151620864868, + "learning_rate": 1.260168085311784e-05, + "loss": 0.0693206787109375, + "step": 8384 + }, + { + "epoch": 0.5667838312829525, + "grad_norm": 0.7907705307006836, + "learning_rate": 1.259842382889444e-05, + "loss": 0.12918853759765625, + "step": 8385 + }, + { + "epoch": 0.5668514262538867, + "grad_norm": 0.5824630856513977, + "learning_rate": 1.2595166920874225e-05, + "loss": 0.08136749267578125, + "step": 8386 + }, + { + "epoch": 0.5669190212248209, + "grad_norm": 0.2801508605480194, + "learning_rate": 1.25919101292148e-05, + "loss": 0.04547119140625, + "step": 8387 + }, + { + "epoch": 0.5669866161957551, + "grad_norm": 0.2745221257209778, + "learning_rate": 1.2588653454073728e-05, + "loss": 0.0605010986328125, + "step": 8388 + }, + { + "epoch": 0.5670542111666892, + "grad_norm": 0.33358222246170044, + "learning_rate": 1.2585396895608602e-05, + "loss": 0.05657196044921875, + "step": 8389 + }, + { + "epoch": 0.5671218061376233, + "grad_norm": 0.24063025414943695, + "learning_rate": 1.2582140453976985e-05, + "loss": 0.034732818603515625, + "step": 8390 + }, + { + "epoch": 0.5671894011085575, + "grad_norm": 1.1352790594100952, + "learning_rate": 1.257888412933646e-05, + "loss": 0.1850128173828125, + "step": 8391 + }, + { + "epoch": 0.5672569960794916, + "grad_norm": 0.8400478363037109, + "learning_rate": 1.2575627921844577e-05, + "loss": 0.1178741455078125, + "step": 8392 + }, + { + "epoch": 0.5673245910504259, + "grad_norm": 0.7331691980361938, + "learning_rate": 1.2572371831658885e-05, + "loss": 0.114593505859375, + "step": 8393 + }, + { + "epoch": 0.56739218602136, + "grad_norm": 0.4676579236984253, + "learning_rate": 1.2569115858936942e-05, + "loss": 0.07450485229492188, + "step": 8394 + }, + { + "epoch": 0.5674597809922942, + "grad_norm": 0.8859687447547913, + "learning_rate": 1.256586000383629e-05, + "loss": 0.1823577880859375, + "step": 8395 + }, + { + "epoch": 0.5675273759632283, + "grad_norm": 0.7822870016098022, + "learning_rate": 1.2562604266514477e-05, + "loss": 0.09673309326171875, + "step": 8396 + }, + { + "epoch": 0.5675949709341624, + "grad_norm": 0.36287811398506165, + "learning_rate": 1.2559348647129018e-05, + "loss": 0.060001373291015625, + "step": 8397 + }, + { + "epoch": 0.5676625659050967, + "grad_norm": 0.5411337614059448, + "learning_rate": 1.2556093145837455e-05, + "loss": 0.0919342041015625, + "step": 8398 + }, + { + "epoch": 0.5677301608760308, + "grad_norm": 0.2561699450016022, + "learning_rate": 1.25528377627973e-05, + "loss": 0.04741668701171875, + "step": 8399 + }, + { + "epoch": 0.567797755846965, + "grad_norm": 0.3732307255268097, + "learning_rate": 1.2549582498166079e-05, + "loss": 0.05634307861328125, + "step": 8400 + }, + { + "epoch": 0.5678653508178991, + "grad_norm": 0.8632581830024719, + "learning_rate": 1.254632735210129e-05, + "loss": 0.13761138916015625, + "step": 8401 + }, + { + "epoch": 0.5679329457888334, + "grad_norm": 0.21155503392219543, + "learning_rate": 1.2543072324760445e-05, + "loss": 0.03020477294921875, + "step": 8402 + }, + { + "epoch": 0.5680005407597675, + "grad_norm": 0.6263591051101685, + "learning_rate": 1.2539817416301045e-05, + "loss": 0.06372642517089844, + "step": 8403 + }, + { + "epoch": 0.5680681357307016, + "grad_norm": 0.3768131732940674, + "learning_rate": 1.253656262688057e-05, + "loss": 0.06036376953125, + "step": 8404 + }, + { + "epoch": 0.5681357307016358, + "grad_norm": 0.4080195128917694, + "learning_rate": 1.2533307956656522e-05, + "loss": 0.05396270751953125, + "step": 8405 + }, + { + "epoch": 0.5682033256725699, + "grad_norm": 0.39640289545059204, + "learning_rate": 1.253005340578637e-05, + "loss": 0.08721542358398438, + "step": 8406 + }, + { + "epoch": 0.5682709206435042, + "grad_norm": 0.4387664198875427, + "learning_rate": 1.2526798974427598e-05, + "loss": 0.09351348876953125, + "step": 8407 + }, + { + "epoch": 0.5683385156144383, + "grad_norm": 0.8547767996788025, + "learning_rate": 1.2523544662737668e-05, + "loss": 0.179443359375, + "step": 8408 + }, + { + "epoch": 0.5684061105853725, + "grad_norm": 0.6028350591659546, + "learning_rate": 1.2520290470874054e-05, + "loss": 0.1018218994140625, + "step": 8409 + }, + { + "epoch": 0.5684737055563066, + "grad_norm": 0.9519541263580322, + "learning_rate": 1.2517036398994203e-05, + "loss": 0.177734375, + "step": 8410 + }, + { + "epoch": 0.5685413005272407, + "grad_norm": 0.6196027994155884, + "learning_rate": 1.2513782447255574e-05, + "loss": 0.08953857421875, + "step": 8411 + }, + { + "epoch": 0.568608895498175, + "grad_norm": 0.3496325612068176, + "learning_rate": 1.2510528615815612e-05, + "loss": 0.053592681884765625, + "step": 8412 + }, + { + "epoch": 0.5686764904691091, + "grad_norm": 0.7240952849388123, + "learning_rate": 1.250727490483176e-05, + "loss": 0.158355712890625, + "step": 8413 + }, + { + "epoch": 0.5687440854400433, + "grad_norm": 0.32968074083328247, + "learning_rate": 1.250402131446145e-05, + "loss": 0.03954315185546875, + "step": 8414 + }, + { + "epoch": 0.5688116804109774, + "grad_norm": 0.7117916345596313, + "learning_rate": 1.2500767844862106e-05, + "loss": 0.15509033203125, + "step": 8415 + }, + { + "epoch": 0.5688792753819116, + "grad_norm": 0.2959235608577728, + "learning_rate": 1.2497514496191164e-05, + "loss": 0.0637054443359375, + "step": 8416 + }, + { + "epoch": 0.5689468703528457, + "grad_norm": 0.7533898949623108, + "learning_rate": 1.2494261268606026e-05, + "loss": 0.171234130859375, + "step": 8417 + }, + { + "epoch": 0.5690144653237799, + "grad_norm": 1.0488613843917847, + "learning_rate": 1.2491008162264114e-05, + "loss": 0.12473678588867188, + "step": 8418 + }, + { + "epoch": 0.5690820602947141, + "grad_norm": 0.2866862416267395, + "learning_rate": 1.2487755177322828e-05, + "loss": 0.050586700439453125, + "step": 8419 + }, + { + "epoch": 0.5691496552656482, + "grad_norm": 0.30019402503967285, + "learning_rate": 1.2484502313939576e-05, + "loss": 0.037769317626953125, + "step": 8420 + }, + { + "epoch": 0.5692172502365824, + "grad_norm": 0.8775963187217712, + "learning_rate": 1.2481249572271738e-05, + "loss": 0.137359619140625, + "step": 8421 + }, + { + "epoch": 0.5692848452075165, + "grad_norm": 0.2942218780517578, + "learning_rate": 1.2477996952476718e-05, + "loss": 0.04802703857421875, + "step": 8422 + }, + { + "epoch": 0.5693524401784508, + "grad_norm": 0.8056875467300415, + "learning_rate": 1.2474744454711882e-05, + "loss": 0.1505126953125, + "step": 8423 + }, + { + "epoch": 0.5694200351493849, + "grad_norm": 0.43183469772338867, + "learning_rate": 1.2471492079134618e-05, + "loss": 0.0843505859375, + "step": 8424 + }, + { + "epoch": 0.569487630120319, + "grad_norm": 0.33703991770744324, + "learning_rate": 1.2468239825902286e-05, + "loss": 0.0923919677734375, + "step": 8425 + }, + { + "epoch": 0.5695552250912532, + "grad_norm": 0.37829262018203735, + "learning_rate": 1.2464987695172267e-05, + "loss": 0.03771209716796875, + "step": 8426 + }, + { + "epoch": 0.5696228200621873, + "grad_norm": 0.6819974780082703, + "learning_rate": 1.2461735687101902e-05, + "loss": 0.165740966796875, + "step": 8427 + }, + { + "epoch": 0.5696904150331216, + "grad_norm": 0.9778000116348267, + "learning_rate": 1.2458483801848547e-05, + "loss": 0.12658309936523438, + "step": 8428 + }, + { + "epoch": 0.5697580100040557, + "grad_norm": 0.8298202753067017, + "learning_rate": 1.2455232039569557e-05, + "loss": 0.1213531494140625, + "step": 8429 + }, + { + "epoch": 0.5698256049749899, + "grad_norm": 0.25992265343666077, + "learning_rate": 1.2451980400422262e-05, + "loss": 0.040767669677734375, + "step": 8430 + }, + { + "epoch": 0.569893199945924, + "grad_norm": 0.651820719242096, + "learning_rate": 1.2448728884564003e-05, + "loss": 0.134185791015625, + "step": 8431 + }, + { + "epoch": 0.5699607949168581, + "grad_norm": 1.084137201309204, + "learning_rate": 1.2445477492152104e-05, + "loss": 0.1604766845703125, + "step": 8432 + }, + { + "epoch": 0.5700283898877924, + "grad_norm": 0.7774629592895508, + "learning_rate": 1.2442226223343896e-05, + "loss": 0.11294746398925781, + "step": 8433 + }, + { + "epoch": 0.5700959848587265, + "grad_norm": 1.1483633518218994, + "learning_rate": 1.2438975078296685e-05, + "loss": 0.20166015625, + "step": 8434 + }, + { + "epoch": 0.5701635798296607, + "grad_norm": 1.5142112970352173, + "learning_rate": 1.243572405716779e-05, + "loss": 0.17462158203125, + "step": 8435 + }, + { + "epoch": 0.5702311748005948, + "grad_norm": 0.8032716512680054, + "learning_rate": 1.2432473160114507e-05, + "loss": 0.13783645629882812, + "step": 8436 + }, + { + "epoch": 0.570298769771529, + "grad_norm": 1.1894114017486572, + "learning_rate": 1.2429222387294146e-05, + "loss": 0.2073822021484375, + "step": 8437 + }, + { + "epoch": 0.5703663647424632, + "grad_norm": 0.708615779876709, + "learning_rate": 1.2425971738863996e-05, + "loss": 0.1332244873046875, + "step": 8438 + }, + { + "epoch": 0.5704339597133973, + "grad_norm": 0.4910333454608917, + "learning_rate": 1.2422721214981334e-05, + "loss": 0.0833892822265625, + "step": 8439 + }, + { + "epoch": 0.5705015546843315, + "grad_norm": 0.2606343924999237, + "learning_rate": 1.2419470815803449e-05, + "loss": 0.047367095947265625, + "step": 8440 + }, + { + "epoch": 0.5705691496552656, + "grad_norm": 0.617929995059967, + "learning_rate": 1.2416220541487612e-05, + "loss": 0.140869140625, + "step": 8441 + }, + { + "epoch": 0.5706367446261998, + "grad_norm": 0.8298476338386536, + "learning_rate": 1.2412970392191099e-05, + "loss": 0.158111572265625, + "step": 8442 + }, + { + "epoch": 0.570704339597134, + "grad_norm": 0.409871906042099, + "learning_rate": 1.2409720368071161e-05, + "loss": 0.06533050537109375, + "step": 8443 + }, + { + "epoch": 0.5707719345680682, + "grad_norm": 1.0522006750106812, + "learning_rate": 1.2406470469285064e-05, + "loss": 0.17769241333007812, + "step": 8444 + }, + { + "epoch": 0.5708395295390023, + "grad_norm": 0.493179589509964, + "learning_rate": 1.2403220695990052e-05, + "loss": 0.074249267578125, + "step": 8445 + }, + { + "epoch": 0.5709071245099364, + "grad_norm": 0.6500289440155029, + "learning_rate": 1.2399971048343378e-05, + "loss": 0.124481201171875, + "step": 8446 + }, + { + "epoch": 0.5709747194808706, + "grad_norm": 0.3466317057609558, + "learning_rate": 1.2396721526502268e-05, + "loss": 0.07430267333984375, + "step": 8447 + }, + { + "epoch": 0.5710423144518048, + "grad_norm": 0.3782847225666046, + "learning_rate": 1.2393472130623962e-05, + "loss": 0.09258270263671875, + "step": 8448 + }, + { + "epoch": 0.571109909422739, + "grad_norm": 0.9490846395492554, + "learning_rate": 1.2390222860865689e-05, + "loss": 0.13422393798828125, + "step": 8449 + }, + { + "epoch": 0.5711775043936731, + "grad_norm": 0.3426610231399536, + "learning_rate": 1.2386973717384656e-05, + "loss": 0.0579071044921875, + "step": 8450 + }, + { + "epoch": 0.5712450993646073, + "grad_norm": 1.2195324897766113, + "learning_rate": 1.2383724700338091e-05, + "loss": 0.1682281494140625, + "step": 8451 + }, + { + "epoch": 0.5713126943355414, + "grad_norm": 1.0388990640640259, + "learning_rate": 1.2380475809883191e-05, + "loss": 0.16973876953125, + "step": 8452 + }, + { + "epoch": 0.5713802893064756, + "grad_norm": 0.35031285881996155, + "learning_rate": 1.2377227046177163e-05, + "loss": 0.06711578369140625, + "step": 8453 + }, + { + "epoch": 0.5714478842774098, + "grad_norm": 0.8513791561126709, + "learning_rate": 1.23739784093772e-05, + "loss": 0.152130126953125, + "step": 8454 + }, + { + "epoch": 0.5715154792483439, + "grad_norm": 0.9198154807090759, + "learning_rate": 1.2370729899640498e-05, + "loss": 0.20135498046875, + "step": 8455 + }, + { + "epoch": 0.5715830742192781, + "grad_norm": 0.44459933042526245, + "learning_rate": 1.2367481517124229e-05, + "loss": 0.07025146484375, + "step": 8456 + }, + { + "epoch": 0.5716506691902122, + "grad_norm": 0.4543910026550293, + "learning_rate": 1.2364233261985577e-05, + "loss": 0.1075439453125, + "step": 8457 + }, + { + "epoch": 0.5717182641611465, + "grad_norm": 0.4281620681285858, + "learning_rate": 1.236098513438171e-05, + "loss": 0.051265716552734375, + "step": 8458 + }, + { + "epoch": 0.5717858591320806, + "grad_norm": 0.39645546674728394, + "learning_rate": 1.2357737134469802e-05, + "loss": 0.0839080810546875, + "step": 8459 + }, + { + "epoch": 0.5718534541030147, + "grad_norm": 0.42371076345443726, + "learning_rate": 1.2354489262406994e-05, + "loss": 0.098388671875, + "step": 8460 + }, + { + "epoch": 0.5719210490739489, + "grad_norm": 0.20967712998390198, + "learning_rate": 1.2351241518350454e-05, + "loss": 0.03452301025390625, + "step": 8461 + }, + { + "epoch": 0.571988644044883, + "grad_norm": 1.298867106437683, + "learning_rate": 1.2347993902457322e-05, + "loss": 0.1874542236328125, + "step": 8462 + }, + { + "epoch": 0.5720562390158173, + "grad_norm": 0.22758221626281738, + "learning_rate": 1.2344746414884733e-05, + "loss": 0.0366973876953125, + "step": 8463 + }, + { + "epoch": 0.5721238339867514, + "grad_norm": 0.4256134331226349, + "learning_rate": 1.2341499055789826e-05, + "loss": 0.0755615234375, + "step": 8464 + }, + { + "epoch": 0.5721914289576856, + "grad_norm": 0.3240297734737396, + "learning_rate": 1.2338251825329728e-05, + "loss": 0.06452178955078125, + "step": 8465 + }, + { + "epoch": 0.5722590239286197, + "grad_norm": 1.1226601600646973, + "learning_rate": 1.2335004723661564e-05, + "loss": 0.1419830322265625, + "step": 8466 + }, + { + "epoch": 0.5723266188995538, + "grad_norm": 0.2197822630405426, + "learning_rate": 1.233175775094244e-05, + "loss": 0.0338897705078125, + "step": 8467 + }, + { + "epoch": 0.5723942138704881, + "grad_norm": 0.6552119255065918, + "learning_rate": 1.2328510907329475e-05, + "loss": 0.10117721557617188, + "step": 8468 + }, + { + "epoch": 0.5724618088414222, + "grad_norm": 0.28153085708618164, + "learning_rate": 1.232526419297976e-05, + "loss": 0.047271728515625, + "step": 8469 + }, + { + "epoch": 0.5725294038123564, + "grad_norm": 0.27664902806282043, + "learning_rate": 1.23220176080504e-05, + "loss": 0.0610504150390625, + "step": 8470 + }, + { + "epoch": 0.5725969987832905, + "grad_norm": 0.5178539156913757, + "learning_rate": 1.231877115269848e-05, + "loss": 0.1051483154296875, + "step": 8471 + }, + { + "epoch": 0.5726645937542247, + "grad_norm": 0.7502002716064453, + "learning_rate": 1.2315524827081093e-05, + "loss": 0.12342071533203125, + "step": 8472 + }, + { + "epoch": 0.5727321887251589, + "grad_norm": 0.28867945075035095, + "learning_rate": 1.2312278631355305e-05, + "loss": 0.04822540283203125, + "step": 8473 + }, + { + "epoch": 0.572799783696093, + "grad_norm": 0.5581231117248535, + "learning_rate": 1.2309032565678188e-05, + "loss": 0.08599853515625, + "step": 8474 + }, + { + "epoch": 0.5728673786670272, + "grad_norm": 1.0820914506912231, + "learning_rate": 1.230578663020682e-05, + "loss": 0.1791229248046875, + "step": 8475 + }, + { + "epoch": 0.5729349736379613, + "grad_norm": 0.5897020101547241, + "learning_rate": 1.230254082509824e-05, + "loss": 0.12567138671875, + "step": 8476 + }, + { + "epoch": 0.5730025686088955, + "grad_norm": 0.8411328792572021, + "learning_rate": 1.2299295150509514e-05, + "loss": 0.133697509765625, + "step": 8477 + }, + { + "epoch": 0.5730701635798297, + "grad_norm": 1.2914390563964844, + "learning_rate": 1.2296049606597681e-05, + "loss": 0.15593719482421875, + "step": 8478 + }, + { + "epoch": 0.5731377585507639, + "grad_norm": 0.8654220104217529, + "learning_rate": 1.229280419351979e-05, + "loss": 0.145843505859375, + "step": 8479 + }, + { + "epoch": 0.573205353521698, + "grad_norm": 0.5763646960258484, + "learning_rate": 1.2289558911432862e-05, + "loss": 0.08826446533203125, + "step": 8480 + }, + { + "epoch": 0.5732729484926321, + "grad_norm": 1.093503713607788, + "learning_rate": 1.2286313760493934e-05, + "loss": 0.250030517578125, + "step": 8481 + }, + { + "epoch": 0.5733405434635663, + "grad_norm": 0.6218481659889221, + "learning_rate": 1.2283068740860017e-05, + "loss": 0.15130615234375, + "step": 8482 + }, + { + "epoch": 0.5734081384345004, + "grad_norm": 0.3166939318180084, + "learning_rate": 1.2279823852688141e-05, + "loss": 0.043239593505859375, + "step": 8483 + }, + { + "epoch": 0.5734757334054347, + "grad_norm": 0.3310858905315399, + "learning_rate": 1.22765790961353e-05, + "loss": 0.06644439697265625, + "step": 8484 + }, + { + "epoch": 0.5735433283763688, + "grad_norm": 0.4280115067958832, + "learning_rate": 1.2273334471358493e-05, + "loss": 0.08303070068359375, + "step": 8485 + }, + { + "epoch": 0.573610923347303, + "grad_norm": 0.6546331644058228, + "learning_rate": 1.2270089978514727e-05, + "loss": 0.12762451171875, + "step": 8486 + }, + { + "epoch": 0.5736785183182371, + "grad_norm": 0.3490179777145386, + "learning_rate": 1.2266845617760978e-05, + "loss": 0.0374298095703125, + "step": 8487 + }, + { + "epoch": 0.5737461132891712, + "grad_norm": 0.4395459294319153, + "learning_rate": 1.2263601389254247e-05, + "loss": 0.10884857177734375, + "step": 8488 + }, + { + "epoch": 0.5738137082601055, + "grad_norm": 0.8459939956665039, + "learning_rate": 1.2260357293151487e-05, + "loss": 0.173492431640625, + "step": 8489 + }, + { + "epoch": 0.5738813032310396, + "grad_norm": 1.2709506750106812, + "learning_rate": 1.2257113329609684e-05, + "loss": 0.19970703125, + "step": 8490 + }, + { + "epoch": 0.5739488982019738, + "grad_norm": 1.4361350536346436, + "learning_rate": 1.2253869498785791e-05, + "loss": 0.1959991455078125, + "step": 8491 + }, + { + "epoch": 0.5740164931729079, + "grad_norm": 0.3680454194545746, + "learning_rate": 1.225062580083678e-05, + "loss": 0.0729217529296875, + "step": 8492 + }, + { + "epoch": 0.5740840881438422, + "grad_norm": 0.37655916810035706, + "learning_rate": 1.2247382235919582e-05, + "loss": 0.0790863037109375, + "step": 8493 + }, + { + "epoch": 0.5741516831147763, + "grad_norm": 0.4940882921218872, + "learning_rate": 1.2244138804191152e-05, + "loss": 0.1031036376953125, + "step": 8494 + }, + { + "epoch": 0.5742192780857104, + "grad_norm": 0.4051227569580078, + "learning_rate": 1.2240895505808423e-05, + "loss": 0.06772613525390625, + "step": 8495 + }, + { + "epoch": 0.5742868730566446, + "grad_norm": 0.5546890497207642, + "learning_rate": 1.2237652340928334e-05, + "loss": 0.1248931884765625, + "step": 8496 + }, + { + "epoch": 0.5743544680275787, + "grad_norm": 0.25389382243156433, + "learning_rate": 1.2234409309707806e-05, + "loss": 0.049991607666015625, + "step": 8497 + }, + { + "epoch": 0.574422062998513, + "grad_norm": 0.19214580953121185, + "learning_rate": 1.2231166412303746e-05, + "loss": 0.03862762451171875, + "step": 8498 + }, + { + "epoch": 0.5744896579694471, + "grad_norm": 0.3447042405605316, + "learning_rate": 1.2227923648873077e-05, + "loss": 0.06620025634765625, + "step": 8499 + }, + { + "epoch": 0.5745572529403813, + "grad_norm": 1.0316461324691772, + "learning_rate": 1.2224681019572698e-05, + "loss": 0.1351470947265625, + "step": 8500 + }, + { + "epoch": 0.5746248479113154, + "grad_norm": 0.40448707342147827, + "learning_rate": 1.2221438524559519e-05, + "loss": 0.0857086181640625, + "step": 8501 + }, + { + "epoch": 0.5746924428822495, + "grad_norm": 0.5313166379928589, + "learning_rate": 1.2218196163990415e-05, + "loss": 0.081085205078125, + "step": 8502 + }, + { + "epoch": 0.5747600378531837, + "grad_norm": 0.5134736895561218, + "learning_rate": 1.2214953938022286e-05, + "loss": 0.0920257568359375, + "step": 8503 + }, + { + "epoch": 0.5748276328241179, + "grad_norm": 0.7525824904441833, + "learning_rate": 1.2211711846812e-05, + "loss": 0.12485504150390625, + "step": 8504 + }, + { + "epoch": 0.5748952277950521, + "grad_norm": 1.2576247453689575, + "learning_rate": 1.2208469890516444e-05, + "loss": 0.143890380859375, + "step": 8505 + }, + { + "epoch": 0.5749628227659862, + "grad_norm": 0.6033251285552979, + "learning_rate": 1.2205228069292467e-05, + "loss": 0.083831787109375, + "step": 8506 + }, + { + "epoch": 0.5750304177369203, + "grad_norm": 0.46613290905952454, + "learning_rate": 1.220198638329694e-05, + "loss": 0.07656097412109375, + "step": 8507 + }, + { + "epoch": 0.5750980127078545, + "grad_norm": 0.6304692625999451, + "learning_rate": 1.2198744832686717e-05, + "loss": 0.1209869384765625, + "step": 8508 + }, + { + "epoch": 0.5751656076787887, + "grad_norm": 0.40023118257522583, + "learning_rate": 1.2195503417618631e-05, + "loss": 0.069854736328125, + "step": 8509 + }, + { + "epoch": 0.5752332026497229, + "grad_norm": 0.25139322876930237, + "learning_rate": 1.2192262138249536e-05, + "loss": 0.05371856689453125, + "step": 8510 + }, + { + "epoch": 0.575300797620657, + "grad_norm": 0.9901968240737915, + "learning_rate": 1.2189020994736255e-05, + "loss": 0.161224365234375, + "step": 8511 + }, + { + "epoch": 0.5753683925915912, + "grad_norm": 0.3838174343109131, + "learning_rate": 1.2185779987235627e-05, + "loss": 0.07828521728515625, + "step": 8512 + }, + { + "epoch": 0.5754359875625253, + "grad_norm": 1.9874342679977417, + "learning_rate": 1.2182539115904459e-05, + "loss": 0.17684173583984375, + "step": 8513 + }, + { + "epoch": 0.5755035825334595, + "grad_norm": 0.41978445649147034, + "learning_rate": 1.2179298380899574e-05, + "loss": 0.06913375854492188, + "step": 8514 + }, + { + "epoch": 0.5755711775043937, + "grad_norm": 0.3557153344154358, + "learning_rate": 1.2176057782377771e-05, + "loss": 0.054576873779296875, + "step": 8515 + }, + { + "epoch": 0.5756387724753278, + "grad_norm": 0.9222202897071838, + "learning_rate": 1.2172817320495857e-05, + "loss": 0.15355682373046875, + "step": 8516 + }, + { + "epoch": 0.575706367446262, + "grad_norm": 0.35663124918937683, + "learning_rate": 1.216957699541062e-05, + "loss": 0.06903839111328125, + "step": 8517 + }, + { + "epoch": 0.5757739624171961, + "grad_norm": 0.32501015067100525, + "learning_rate": 1.2166336807278856e-05, + "loss": 0.07927703857421875, + "step": 8518 + }, + { + "epoch": 0.5758415573881304, + "grad_norm": 0.3278743326663971, + "learning_rate": 1.2163096756257336e-05, + "loss": 0.059879302978515625, + "step": 8519 + }, + { + "epoch": 0.5759091523590645, + "grad_norm": 0.578869104385376, + "learning_rate": 1.2159856842502835e-05, + "loss": 0.11855888366699219, + "step": 8520 + }, + { + "epoch": 0.5759767473299986, + "grad_norm": 0.35479551553726196, + "learning_rate": 1.2156617066172128e-05, + "loss": 0.050991058349609375, + "step": 8521 + }, + { + "epoch": 0.5760443423009328, + "grad_norm": 0.765856146812439, + "learning_rate": 1.2153377427421965e-05, + "loss": 0.11743927001953125, + "step": 8522 + }, + { + "epoch": 0.5761119372718669, + "grad_norm": 0.732251763343811, + "learning_rate": 1.2150137926409108e-05, + "loss": 0.14539718627929688, + "step": 8523 + }, + { + "epoch": 0.5761795322428012, + "grad_norm": 0.22258621454238892, + "learning_rate": 1.2146898563290298e-05, + "loss": 0.04213714599609375, + "step": 8524 + }, + { + "epoch": 0.5762471272137353, + "grad_norm": 0.7064968943595886, + "learning_rate": 1.2143659338222285e-05, + "loss": 0.185882568359375, + "step": 8525 + }, + { + "epoch": 0.5763147221846695, + "grad_norm": 0.255118727684021, + "learning_rate": 1.214042025136179e-05, + "loss": 0.0393829345703125, + "step": 8526 + }, + { + "epoch": 0.5763823171556036, + "grad_norm": 0.7827516198158264, + "learning_rate": 1.213718130286555e-05, + "loss": 0.105621337890625, + "step": 8527 + }, + { + "epoch": 0.5764499121265377, + "grad_norm": 0.5834528803825378, + "learning_rate": 1.213394249289028e-05, + "loss": 0.08419036865234375, + "step": 8528 + }, + { + "epoch": 0.576517507097472, + "grad_norm": 0.31221604347229004, + "learning_rate": 1.2130703821592701e-05, + "loss": 0.0707855224609375, + "step": 8529 + }, + { + "epoch": 0.5765851020684061, + "grad_norm": 0.3864678740501404, + "learning_rate": 1.2127465289129509e-05, + "loss": 0.08350372314453125, + "step": 8530 + }, + { + "epoch": 0.5766526970393403, + "grad_norm": 0.7302019000053406, + "learning_rate": 1.2124226895657418e-05, + "loss": 0.194854736328125, + "step": 8531 + }, + { + "epoch": 0.5767202920102744, + "grad_norm": 0.6557503342628479, + "learning_rate": 1.212098864133311e-05, + "loss": 0.12164306640625, + "step": 8532 + }, + { + "epoch": 0.5767878869812086, + "grad_norm": 0.30485376715660095, + "learning_rate": 1.2117750526313272e-05, + "loss": 0.062896728515625, + "step": 8533 + }, + { + "epoch": 0.5768554819521428, + "grad_norm": 0.26742982864379883, + "learning_rate": 1.2114512550754597e-05, + "loss": 0.04431915283203125, + "step": 8534 + }, + { + "epoch": 0.5769230769230769, + "grad_norm": 0.5721642971038818, + "learning_rate": 1.2111274714813741e-05, + "loss": 0.11548614501953125, + "step": 8535 + }, + { + "epoch": 0.5769906718940111, + "grad_norm": 1.1830639839172363, + "learning_rate": 1.2108037018647382e-05, + "loss": 0.2162017822265625, + "step": 8536 + }, + { + "epoch": 0.5770582668649452, + "grad_norm": 0.5518195629119873, + "learning_rate": 1.2104799462412174e-05, + "loss": 0.1108245849609375, + "step": 8537 + }, + { + "epoch": 0.5771258618358794, + "grad_norm": 0.835178554058075, + "learning_rate": 1.210156204626478e-05, + "loss": 0.21136474609375, + "step": 8538 + }, + { + "epoch": 0.5771934568068136, + "grad_norm": 0.8552346229553223, + "learning_rate": 1.2098324770361834e-05, + "loss": 0.17474365234375, + "step": 8539 + }, + { + "epoch": 0.5772610517777478, + "grad_norm": 0.5774468183517456, + "learning_rate": 1.2095087634859983e-05, + "loss": 0.09929656982421875, + "step": 8540 + }, + { + "epoch": 0.5773286467486819, + "grad_norm": 0.658208966255188, + "learning_rate": 1.2091850639915853e-05, + "loss": 0.11801910400390625, + "step": 8541 + }, + { + "epoch": 0.577396241719616, + "grad_norm": 0.59736168384552, + "learning_rate": 1.2088613785686083e-05, + "loss": 0.130157470703125, + "step": 8542 + }, + { + "epoch": 0.5774638366905502, + "grad_norm": 0.27420151233673096, + "learning_rate": 1.2085377072327283e-05, + "loss": 0.05077362060546875, + "step": 8543 + }, + { + "epoch": 0.5775314316614844, + "grad_norm": 0.2497580647468567, + "learning_rate": 1.208214049999606e-05, + "loss": 0.03781890869140625, + "step": 8544 + }, + { + "epoch": 0.5775990266324186, + "grad_norm": 0.6437122821807861, + "learning_rate": 1.2078904068849028e-05, + "loss": 0.1122283935546875, + "step": 8545 + }, + { + "epoch": 0.5776666216033527, + "grad_norm": 0.4671596586704254, + "learning_rate": 1.2075667779042781e-05, + "loss": 0.06734848022460938, + "step": 8546 + }, + { + "epoch": 0.5777342165742869, + "grad_norm": 0.3636409342288971, + "learning_rate": 1.2072431630733919e-05, + "loss": 0.060638427734375, + "step": 8547 + }, + { + "epoch": 0.577801811545221, + "grad_norm": 1.46625816822052, + "learning_rate": 1.2069195624079016e-05, + "loss": 0.18610382080078125, + "step": 8548 + }, + { + "epoch": 0.5778694065161551, + "grad_norm": 0.26816970109939575, + "learning_rate": 1.2065959759234657e-05, + "loss": 0.048370361328125, + "step": 8549 + }, + { + "epoch": 0.5779370014870894, + "grad_norm": 0.29010576009750366, + "learning_rate": 1.2062724036357406e-05, + "loss": 0.05062103271484375, + "step": 8550 + }, + { + "epoch": 0.5780045964580235, + "grad_norm": 1.221967101097107, + "learning_rate": 1.2059488455603844e-05, + "loss": 0.2234039306640625, + "step": 8551 + }, + { + "epoch": 0.5780721914289577, + "grad_norm": 0.5243070125579834, + "learning_rate": 1.205625301713051e-05, + "loss": 0.10466766357421875, + "step": 8552 + }, + { + "epoch": 0.5781397863998918, + "grad_norm": 0.675910234451294, + "learning_rate": 1.205301772109396e-05, + "loss": 0.1197967529296875, + "step": 8553 + }, + { + "epoch": 0.5782073813708261, + "grad_norm": 0.8998014330863953, + "learning_rate": 1.2049782567650748e-05, + "loss": 0.215789794921875, + "step": 8554 + }, + { + "epoch": 0.5782749763417602, + "grad_norm": 0.24475537240505219, + "learning_rate": 1.2046547556957394e-05, + "loss": 0.04638671875, + "step": 8555 + }, + { + "epoch": 0.5783425713126943, + "grad_norm": 0.28706276416778564, + "learning_rate": 1.2043312689170443e-05, + "loss": 0.04259967803955078, + "step": 8556 + }, + { + "epoch": 0.5784101662836285, + "grad_norm": 0.22334176301956177, + "learning_rate": 1.2040077964446405e-05, + "loss": 0.04744720458984375, + "step": 8557 + }, + { + "epoch": 0.5784777612545626, + "grad_norm": 0.9908753633499146, + "learning_rate": 1.203684338294181e-05, + "loss": 0.166107177734375, + "step": 8558 + }, + { + "epoch": 0.5785453562254969, + "grad_norm": 0.3436937630176544, + "learning_rate": 1.2033608944813155e-05, + "loss": 0.0595245361328125, + "step": 8559 + }, + { + "epoch": 0.578612951196431, + "grad_norm": 0.26156148314476013, + "learning_rate": 1.203037465021695e-05, + "loss": 0.038482666015625, + "step": 8560 + }, + { + "epoch": 0.5786805461673652, + "grad_norm": 0.7372247576713562, + "learning_rate": 1.2027140499309685e-05, + "loss": 0.13507080078125, + "step": 8561 + }, + { + "epoch": 0.5787481411382993, + "grad_norm": 1.300343632698059, + "learning_rate": 1.202390649224785e-05, + "loss": 0.18634033203125, + "step": 8562 + }, + { + "epoch": 0.5788157361092334, + "grad_norm": 0.6869251132011414, + "learning_rate": 1.2020672629187927e-05, + "loss": 0.15576171875, + "step": 8563 + }, + { + "epoch": 0.5788833310801677, + "grad_norm": 0.7293714880943298, + "learning_rate": 1.2017438910286395e-05, + "loss": 0.114349365234375, + "step": 8564 + }, + { + "epoch": 0.5789509260511018, + "grad_norm": 0.9042299389839172, + "learning_rate": 1.2014205335699709e-05, + "loss": 0.144683837890625, + "step": 8565 + }, + { + "epoch": 0.579018521022036, + "grad_norm": 0.22350046038627625, + "learning_rate": 1.2010971905584343e-05, + "loss": 0.03792572021484375, + "step": 8566 + }, + { + "epoch": 0.5790861159929701, + "grad_norm": 0.3506701588630676, + "learning_rate": 1.2007738620096748e-05, + "loss": 0.035305023193359375, + "step": 8567 + }, + { + "epoch": 0.5791537109639043, + "grad_norm": 0.3710325360298157, + "learning_rate": 1.2004505479393359e-05, + "loss": 0.06694793701171875, + "step": 8568 + }, + { + "epoch": 0.5792213059348384, + "grad_norm": 0.6430571675300598, + "learning_rate": 1.200127248363063e-05, + "loss": 0.11820220947265625, + "step": 8569 + }, + { + "epoch": 0.5792889009057726, + "grad_norm": 0.3726018965244293, + "learning_rate": 1.1998039632964981e-05, + "loss": 0.03128623962402344, + "step": 8570 + }, + { + "epoch": 0.5793564958767068, + "grad_norm": 1.8220399618148804, + "learning_rate": 1.1994806927552852e-05, + "loss": 0.2730712890625, + "step": 8571 + }, + { + "epoch": 0.5794240908476409, + "grad_norm": 0.7473261952400208, + "learning_rate": 1.1991574367550645e-05, + "loss": 0.1522216796875, + "step": 8572 + }, + { + "epoch": 0.5794916858185751, + "grad_norm": 1.3782806396484375, + "learning_rate": 1.1988341953114783e-05, + "loss": 0.172454833984375, + "step": 8573 + }, + { + "epoch": 0.5795592807895092, + "grad_norm": 0.3943878412246704, + "learning_rate": 1.1985109684401662e-05, + "loss": 0.069671630859375, + "step": 8574 + }, + { + "epoch": 0.5796268757604435, + "grad_norm": 0.33644095063209534, + "learning_rate": 1.1981877561567692e-05, + "loss": 0.081573486328125, + "step": 8575 + }, + { + "epoch": 0.5796944707313776, + "grad_norm": 0.33980265259742737, + "learning_rate": 1.1978645584769248e-05, + "loss": 0.0684051513671875, + "step": 8576 + }, + { + "epoch": 0.5797620657023117, + "grad_norm": 0.32227209210395813, + "learning_rate": 1.1975413754162728e-05, + "loss": 0.06835174560546875, + "step": 8577 + }, + { + "epoch": 0.5798296606732459, + "grad_norm": 0.6981717348098755, + "learning_rate": 1.1972182069904495e-05, + "loss": 0.13654327392578125, + "step": 8578 + }, + { + "epoch": 0.57989725564418, + "grad_norm": 0.20266373455524445, + "learning_rate": 1.196895053215092e-05, + "loss": 0.048534393310546875, + "step": 8579 + }, + { + "epoch": 0.5799648506151143, + "grad_norm": 0.3578462600708008, + "learning_rate": 1.1965719141058377e-05, + "loss": 0.076751708984375, + "step": 8580 + }, + { + "epoch": 0.5800324455860484, + "grad_norm": 0.6317638158798218, + "learning_rate": 1.1962487896783204e-05, + "loss": 0.136932373046875, + "step": 8581 + }, + { + "epoch": 0.5801000405569826, + "grad_norm": 0.9426562190055847, + "learning_rate": 1.1959256799481762e-05, + "loss": 0.13686370849609375, + "step": 8582 + }, + { + "epoch": 0.5801676355279167, + "grad_norm": 0.8702630996704102, + "learning_rate": 1.195602584931038e-05, + "loss": 0.1521453857421875, + "step": 8583 + }, + { + "epoch": 0.5802352304988508, + "grad_norm": 0.26607945561408997, + "learning_rate": 1.1952795046425405e-05, + "loss": 0.055633544921875, + "step": 8584 + }, + { + "epoch": 0.5803028254697851, + "grad_norm": 0.756016731262207, + "learning_rate": 1.1949564390983149e-05, + "loss": 0.14117431640625, + "step": 8585 + }, + { + "epoch": 0.5803704204407192, + "grad_norm": 0.5746910572052002, + "learning_rate": 1.1946333883139942e-05, + "loss": 0.13970947265625, + "step": 8586 + }, + { + "epoch": 0.5804380154116534, + "grad_norm": 0.42159897089004517, + "learning_rate": 1.194310352305209e-05, + "loss": 0.077178955078125, + "step": 8587 + }, + { + "epoch": 0.5805056103825875, + "grad_norm": 0.3533647358417511, + "learning_rate": 1.1939873310875906e-05, + "loss": 0.0837249755859375, + "step": 8588 + }, + { + "epoch": 0.5805732053535217, + "grad_norm": 0.7482424378395081, + "learning_rate": 1.193664324676768e-05, + "loss": 0.17364501953125, + "step": 8589 + }, + { + "epoch": 0.5806408003244559, + "grad_norm": 0.5928764343261719, + "learning_rate": 1.19334133308837e-05, + "loss": 0.08733367919921875, + "step": 8590 + }, + { + "epoch": 0.58070839529539, + "grad_norm": 0.4751546084880829, + "learning_rate": 1.1930183563380258e-05, + "loss": 0.0978546142578125, + "step": 8591 + }, + { + "epoch": 0.5807759902663242, + "grad_norm": 0.3277466297149658, + "learning_rate": 1.192695394441362e-05, + "loss": 0.04296112060546875, + "step": 8592 + }, + { + "epoch": 0.5808435852372583, + "grad_norm": 0.3410690724849701, + "learning_rate": 1.1923724474140072e-05, + "loss": 0.05519866943359375, + "step": 8593 + }, + { + "epoch": 0.5809111802081925, + "grad_norm": 0.3868730366230011, + "learning_rate": 1.1920495152715856e-05, + "loss": 0.0866241455078125, + "step": 8594 + }, + { + "epoch": 0.5809787751791267, + "grad_norm": 1.0719691514968872, + "learning_rate": 1.1917265980297238e-05, + "loss": 0.190093994140625, + "step": 8595 + }, + { + "epoch": 0.5810463701500609, + "grad_norm": 1.0058872699737549, + "learning_rate": 1.1914036957040462e-05, + "loss": 0.142669677734375, + "step": 8596 + }, + { + "epoch": 0.581113965120995, + "grad_norm": 0.2015431970357895, + "learning_rate": 1.1910808083101775e-05, + "loss": 0.026691436767578125, + "step": 8597 + }, + { + "epoch": 0.5811815600919291, + "grad_norm": 0.616239070892334, + "learning_rate": 1.1907579358637399e-05, + "loss": 0.1101837158203125, + "step": 8598 + }, + { + "epoch": 0.5812491550628633, + "grad_norm": 0.34749865531921387, + "learning_rate": 1.1904350783803568e-05, + "loss": 0.05167388916015625, + "step": 8599 + }, + { + "epoch": 0.5813167500337975, + "grad_norm": 0.7456692457199097, + "learning_rate": 1.1901122358756494e-05, + "loss": 0.091278076171875, + "step": 8600 + }, + { + "epoch": 0.5813843450047317, + "grad_norm": 0.7433719635009766, + "learning_rate": 1.1897894083652398e-05, + "loss": 0.1316070556640625, + "step": 8601 + }, + { + "epoch": 0.5814519399756658, + "grad_norm": 0.2663954794406891, + "learning_rate": 1.1894665958647476e-05, + "loss": 0.023693084716796875, + "step": 8602 + }, + { + "epoch": 0.5815195349466, + "grad_norm": 0.9412418603897095, + "learning_rate": 1.1891437983897922e-05, + "loss": 0.13433074951171875, + "step": 8603 + }, + { + "epoch": 0.5815871299175341, + "grad_norm": 0.21448205411434174, + "learning_rate": 1.1888210159559938e-05, + "loss": 0.02860260009765625, + "step": 8604 + }, + { + "epoch": 0.5816547248884683, + "grad_norm": 0.23208117485046387, + "learning_rate": 1.1884982485789692e-05, + "loss": 0.0385589599609375, + "step": 8605 + }, + { + "epoch": 0.5817223198594025, + "grad_norm": 1.6340177059173584, + "learning_rate": 1.188175496274337e-05, + "loss": 0.2313232421875, + "step": 8606 + }, + { + "epoch": 0.5817899148303366, + "grad_norm": 0.9348514676094055, + "learning_rate": 1.1878527590577131e-05, + "loss": 0.155548095703125, + "step": 8607 + }, + { + "epoch": 0.5818575098012708, + "grad_norm": 0.6595529913902283, + "learning_rate": 1.1875300369447141e-05, + "loss": 0.116241455078125, + "step": 8608 + }, + { + "epoch": 0.5819251047722049, + "grad_norm": 0.3162011206150055, + "learning_rate": 1.1872073299509548e-05, + "loss": 0.04184722900390625, + "step": 8609 + }, + { + "epoch": 0.5819926997431392, + "grad_norm": 0.4723498821258545, + "learning_rate": 1.186884638092051e-05, + "loss": 0.07061767578125, + "step": 8610 + }, + { + "epoch": 0.5820602947140733, + "grad_norm": 1.3063455820083618, + "learning_rate": 1.1865619613836148e-05, + "loss": 0.17273712158203125, + "step": 8611 + }, + { + "epoch": 0.5821278896850074, + "grad_norm": 0.21297375857830048, + "learning_rate": 1.1862392998412605e-05, + "loss": 0.021968841552734375, + "step": 8612 + }, + { + "epoch": 0.5821954846559416, + "grad_norm": 1.0102170705795288, + "learning_rate": 1.1859166534806005e-05, + "loss": 0.18701171875, + "step": 8613 + }, + { + "epoch": 0.5822630796268757, + "grad_norm": 1.152959942817688, + "learning_rate": 1.1855940223172454e-05, + "loss": 0.171295166015625, + "step": 8614 + }, + { + "epoch": 0.58233067459781, + "grad_norm": 1.2444326877593994, + "learning_rate": 1.1852714063668073e-05, + "loss": 0.15970611572265625, + "step": 8615 + }, + { + "epoch": 0.5823982695687441, + "grad_norm": 0.7936856746673584, + "learning_rate": 1.1849488056448952e-05, + "loss": 0.119384765625, + "step": 8616 + }, + { + "epoch": 0.5824658645396783, + "grad_norm": 0.2567548155784607, + "learning_rate": 1.1846262201671199e-05, + "loss": 0.044155120849609375, + "step": 8617 + }, + { + "epoch": 0.5825334595106124, + "grad_norm": 0.23927143216133118, + "learning_rate": 1.1843036499490887e-05, + "loss": 0.04400634765625, + "step": 8618 + }, + { + "epoch": 0.5826010544815465, + "grad_norm": 1.0116455554962158, + "learning_rate": 1.183981095006411e-05, + "loss": 0.169586181640625, + "step": 8619 + }, + { + "epoch": 0.5826686494524808, + "grad_norm": 1.046712875366211, + "learning_rate": 1.183658555354692e-05, + "loss": 0.197357177734375, + "step": 8620 + }, + { + "epoch": 0.5827362444234149, + "grad_norm": 0.772610068321228, + "learning_rate": 1.1833360310095406e-05, + "loss": 0.18438720703125, + "step": 8621 + }, + { + "epoch": 0.5828038393943491, + "grad_norm": 0.8396481275558472, + "learning_rate": 1.1830135219865605e-05, + "loss": 0.12659835815429688, + "step": 8622 + }, + { + "epoch": 0.5828714343652832, + "grad_norm": 1.2977161407470703, + "learning_rate": 1.1826910283013583e-05, + "loss": 0.159942626953125, + "step": 8623 + }, + { + "epoch": 0.5829390293362174, + "grad_norm": 0.9674757122993469, + "learning_rate": 1.1823685499695368e-05, + "loss": 0.1429901123046875, + "step": 8624 + }, + { + "epoch": 0.5830066243071516, + "grad_norm": 0.29122647643089294, + "learning_rate": 1.1820460870067006e-05, + "loss": 0.0649871826171875, + "step": 8625 + }, + { + "epoch": 0.5830742192780857, + "grad_norm": 0.5227072834968567, + "learning_rate": 1.1817236394284522e-05, + "loss": 0.07788848876953125, + "step": 8626 + }, + { + "epoch": 0.5831418142490199, + "grad_norm": 0.7214226126670837, + "learning_rate": 1.181401207250393e-05, + "loss": 0.12188720703125, + "step": 8627 + }, + { + "epoch": 0.583209409219954, + "grad_norm": 0.654706597328186, + "learning_rate": 1.1810787904881249e-05, + "loss": 0.11272430419921875, + "step": 8628 + }, + { + "epoch": 0.5832770041908882, + "grad_norm": 0.258196622133255, + "learning_rate": 1.180756389157248e-05, + "loss": 0.045989990234375, + "step": 8629 + }, + { + "epoch": 0.5833445991618224, + "grad_norm": 0.3563869297504425, + "learning_rate": 1.180434003273363e-05, + "loss": 0.0509033203125, + "step": 8630 + }, + { + "epoch": 0.5834121941327565, + "grad_norm": 0.2863663136959076, + "learning_rate": 1.1801116328520677e-05, + "loss": 0.06133270263671875, + "step": 8631 + }, + { + "epoch": 0.5834797891036907, + "grad_norm": 0.3733665347099304, + "learning_rate": 1.1797892779089611e-05, + "loss": 0.06261444091796875, + "step": 8632 + }, + { + "epoch": 0.5835473840746248, + "grad_norm": 0.6251530647277832, + "learning_rate": 1.1794669384596405e-05, + "loss": 0.07413101196289062, + "step": 8633 + }, + { + "epoch": 0.583614979045559, + "grad_norm": 0.2400670200586319, + "learning_rate": 1.1791446145197034e-05, + "loss": 0.04400062561035156, + "step": 8634 + }, + { + "epoch": 0.5836825740164931, + "grad_norm": 0.4617648124694824, + "learning_rate": 1.1788223061047445e-05, + "loss": 0.07152557373046875, + "step": 8635 + }, + { + "epoch": 0.5837501689874274, + "grad_norm": 0.32431313395500183, + "learning_rate": 1.1785000132303602e-05, + "loss": 0.06760787963867188, + "step": 8636 + }, + { + "epoch": 0.5838177639583615, + "grad_norm": 0.27457115054130554, + "learning_rate": 1.1781777359121446e-05, + "loss": 0.060009002685546875, + "step": 8637 + }, + { + "epoch": 0.5838853589292956, + "grad_norm": 0.2412004917860031, + "learning_rate": 1.1778554741656912e-05, + "loss": 0.03661155700683594, + "step": 8638 + }, + { + "epoch": 0.5839529539002298, + "grad_norm": 0.6953662633895874, + "learning_rate": 1.1775332280065941e-05, + "loss": 0.11528778076171875, + "step": 8639 + }, + { + "epoch": 0.584020548871164, + "grad_norm": 0.352858304977417, + "learning_rate": 1.1772109974504441e-05, + "loss": 0.05245208740234375, + "step": 8640 + }, + { + "epoch": 0.5840881438420982, + "grad_norm": 0.4492652416229248, + "learning_rate": 1.1768887825128338e-05, + "loss": 0.104278564453125, + "step": 8641 + }, + { + "epoch": 0.5841557388130323, + "grad_norm": 0.40868493914604187, + "learning_rate": 1.1765665832093531e-05, + "loss": 0.065826416015625, + "step": 8642 + }, + { + "epoch": 0.5842233337839665, + "grad_norm": 0.3314805328845978, + "learning_rate": 1.1762443995555935e-05, + "loss": 0.04691314697265625, + "step": 8643 + }, + { + "epoch": 0.5842909287549006, + "grad_norm": 0.544348955154419, + "learning_rate": 1.1759222315671423e-05, + "loss": 0.11017990112304688, + "step": 8644 + }, + { + "epoch": 0.5843585237258347, + "grad_norm": 1.0332844257354736, + "learning_rate": 1.1756000792595894e-05, + "loss": 0.14609527587890625, + "step": 8645 + }, + { + "epoch": 0.584426118696769, + "grad_norm": 0.6582888960838318, + "learning_rate": 1.1752779426485219e-05, + "loss": 0.103485107421875, + "step": 8646 + }, + { + "epoch": 0.5844937136677031, + "grad_norm": 0.37230220437049866, + "learning_rate": 1.1749558217495273e-05, + "loss": 0.0857696533203125, + "step": 8647 + }, + { + "epoch": 0.5845613086386373, + "grad_norm": 1.0459517240524292, + "learning_rate": 1.1746337165781913e-05, + "loss": 0.12149810791015625, + "step": 8648 + }, + { + "epoch": 0.5846289036095714, + "grad_norm": 1.0629760026931763, + "learning_rate": 1.1743116271500993e-05, + "loss": 0.1964111328125, + "step": 8649 + }, + { + "epoch": 0.5846964985805057, + "grad_norm": 0.3725661039352417, + "learning_rate": 1.1739895534808367e-05, + "loss": 0.0583038330078125, + "step": 8650 + }, + { + "epoch": 0.5847640935514398, + "grad_norm": 0.7152037620544434, + "learning_rate": 1.1736674955859865e-05, + "loss": 0.1343231201171875, + "step": 8651 + }, + { + "epoch": 0.5848316885223739, + "grad_norm": 0.5853203535079956, + "learning_rate": 1.1733454534811329e-05, + "loss": 0.13250732421875, + "step": 8652 + }, + { + "epoch": 0.5848992834933081, + "grad_norm": 0.8514373898506165, + "learning_rate": 1.1730234271818568e-05, + "loss": 0.09872817993164062, + "step": 8653 + }, + { + "epoch": 0.5849668784642422, + "grad_norm": 0.356289803981781, + "learning_rate": 1.1727014167037413e-05, + "loss": 0.06992340087890625, + "step": 8654 + }, + { + "epoch": 0.5850344734351764, + "grad_norm": 0.6087319850921631, + "learning_rate": 1.1723794220623663e-05, + "loss": 0.06509590148925781, + "step": 8655 + }, + { + "epoch": 0.5851020684061106, + "grad_norm": 0.6758665442466736, + "learning_rate": 1.1720574432733129e-05, + "loss": 0.13042449951171875, + "step": 8656 + }, + { + "epoch": 0.5851696633770448, + "grad_norm": 0.2340799868106842, + "learning_rate": 1.1717354803521593e-05, + "loss": 0.040058135986328125, + "step": 8657 + }, + { + "epoch": 0.5852372583479789, + "grad_norm": 0.4711850881576538, + "learning_rate": 1.1714135333144849e-05, + "loss": 0.083526611328125, + "step": 8658 + }, + { + "epoch": 0.585304853318913, + "grad_norm": 1.1203852891921997, + "learning_rate": 1.1710916021758671e-05, + "loss": 0.17120361328125, + "step": 8659 + }, + { + "epoch": 0.5853724482898472, + "grad_norm": 1.5232524871826172, + "learning_rate": 1.1707696869518833e-05, + "loss": 0.24871826171875, + "step": 8660 + }, + { + "epoch": 0.5854400432607814, + "grad_norm": 0.48472073674201965, + "learning_rate": 1.1704477876581095e-05, + "loss": 0.07486724853515625, + "step": 8661 + }, + { + "epoch": 0.5855076382317156, + "grad_norm": 0.29999661445617676, + "learning_rate": 1.1701259043101208e-05, + "loss": 0.06389617919921875, + "step": 8662 + }, + { + "epoch": 0.5855752332026497, + "grad_norm": 0.6401669383049011, + "learning_rate": 1.169804036923493e-05, + "loss": 0.12896728515625, + "step": 8663 + }, + { + "epoch": 0.5856428281735839, + "grad_norm": 0.9279093742370605, + "learning_rate": 1.1694821855137985e-05, + "loss": 0.12915802001953125, + "step": 8664 + }, + { + "epoch": 0.585710423144518, + "grad_norm": 0.33447450399398804, + "learning_rate": 1.169160350096612e-05, + "loss": 0.054566383361816406, + "step": 8665 + }, + { + "epoch": 0.5857780181154522, + "grad_norm": 0.3870466947555542, + "learning_rate": 1.1688385306875045e-05, + "loss": 0.07666778564453125, + "step": 8666 + }, + { + "epoch": 0.5858456130863864, + "grad_norm": 0.9999167323112488, + "learning_rate": 1.1685167273020492e-05, + "loss": 0.17303466796875, + "step": 8667 + }, + { + "epoch": 0.5859132080573205, + "grad_norm": 0.40957894921302795, + "learning_rate": 1.1681949399558155e-05, + "loss": 0.0626068115234375, + "step": 8668 + }, + { + "epoch": 0.5859808030282547, + "grad_norm": 0.252878874540329, + "learning_rate": 1.1678731686643747e-05, + "loss": 0.049556732177734375, + "step": 8669 + }, + { + "epoch": 0.5860483979991888, + "grad_norm": 0.32346078753471375, + "learning_rate": 1.1675514134432948e-05, + "loss": 0.0618438720703125, + "step": 8670 + }, + { + "epoch": 0.5861159929701231, + "grad_norm": 0.27781108021736145, + "learning_rate": 1.1672296743081453e-05, + "loss": 0.04516029357910156, + "step": 8671 + }, + { + "epoch": 0.5861835879410572, + "grad_norm": 1.3452236652374268, + "learning_rate": 1.166907951274494e-05, + "loss": 0.1886444091796875, + "step": 8672 + }, + { + "epoch": 0.5862511829119913, + "grad_norm": 1.2503914833068848, + "learning_rate": 1.1665862443579068e-05, + "loss": 0.21807861328125, + "step": 8673 + }, + { + "epoch": 0.5863187778829255, + "grad_norm": 0.2165316939353943, + "learning_rate": 1.166264553573951e-05, + "loss": 0.025743484497070312, + "step": 8674 + }, + { + "epoch": 0.5863863728538596, + "grad_norm": 0.5915499329566956, + "learning_rate": 1.1659428789381912e-05, + "loss": 0.12115859985351562, + "step": 8675 + }, + { + "epoch": 0.5864539678247939, + "grad_norm": 0.4125248193740845, + "learning_rate": 1.165621220466193e-05, + "loss": 0.07862091064453125, + "step": 8676 + }, + { + "epoch": 0.586521562795728, + "grad_norm": 0.43594273924827576, + "learning_rate": 1.165299578173519e-05, + "loss": 0.0948486328125, + "step": 8677 + }, + { + "epoch": 0.5865891577666622, + "grad_norm": 1.1920692920684814, + "learning_rate": 1.1649779520757332e-05, + "loss": 0.18426513671875, + "step": 8678 + }, + { + "epoch": 0.5866567527375963, + "grad_norm": 0.32075655460357666, + "learning_rate": 1.1646563421883974e-05, + "loss": 0.04836273193359375, + "step": 8679 + }, + { + "epoch": 0.5867243477085304, + "grad_norm": 0.6618157625198364, + "learning_rate": 1.1643347485270736e-05, + "loss": 0.10634613037109375, + "step": 8680 + }, + { + "epoch": 0.5867919426794647, + "grad_norm": 0.2612909972667694, + "learning_rate": 1.1640131711073218e-05, + "loss": 0.03444671630859375, + "step": 8681 + }, + { + "epoch": 0.5868595376503988, + "grad_norm": 0.3523455858230591, + "learning_rate": 1.1636916099447025e-05, + "loss": 0.0614776611328125, + "step": 8682 + }, + { + "epoch": 0.586927132621333, + "grad_norm": 0.956928551197052, + "learning_rate": 1.1633700650547746e-05, + "loss": 0.129669189453125, + "step": 8683 + }, + { + "epoch": 0.5869947275922671, + "grad_norm": 0.963706910610199, + "learning_rate": 1.1630485364530961e-05, + "loss": 0.177490234375, + "step": 8684 + }, + { + "epoch": 0.5870623225632013, + "grad_norm": 0.5491752624511719, + "learning_rate": 1.1627270241552254e-05, + "loss": 0.08154296875, + "step": 8685 + }, + { + "epoch": 0.5871299175341355, + "grad_norm": 0.2808985412120819, + "learning_rate": 1.1624055281767183e-05, + "loss": 0.040447235107421875, + "step": 8686 + }, + { + "epoch": 0.5871975125050696, + "grad_norm": 1.0977861881256104, + "learning_rate": 1.1620840485331314e-05, + "loss": 0.196075439453125, + "step": 8687 + }, + { + "epoch": 0.5872651074760038, + "grad_norm": 0.46353328227996826, + "learning_rate": 1.1617625852400194e-05, + "loss": 0.05568504333496094, + "step": 8688 + }, + { + "epoch": 0.5873327024469379, + "grad_norm": 0.7141475677490234, + "learning_rate": 1.1614411383129377e-05, + "loss": 0.1514434814453125, + "step": 8689 + }, + { + "epoch": 0.5874002974178721, + "grad_norm": 0.507291853427887, + "learning_rate": 1.1611197077674383e-05, + "loss": 0.0992431640625, + "step": 8690 + }, + { + "epoch": 0.5874678923888063, + "grad_norm": 0.807812511920929, + "learning_rate": 1.1607982936190755e-05, + "loss": 0.181121826171875, + "step": 8691 + }, + { + "epoch": 0.5875354873597405, + "grad_norm": 1.484163761138916, + "learning_rate": 1.1604768958834005e-05, + "loss": 0.195281982421875, + "step": 8692 + }, + { + "epoch": 0.5876030823306746, + "grad_norm": 0.9579480886459351, + "learning_rate": 1.1601555145759652e-05, + "loss": 0.1295166015625, + "step": 8693 + }, + { + "epoch": 0.5876706773016087, + "grad_norm": 0.43379735946655273, + "learning_rate": 1.159834149712319e-05, + "loss": 0.08342742919921875, + "step": 8694 + }, + { + "epoch": 0.5877382722725429, + "grad_norm": 0.7450853586196899, + "learning_rate": 1.1595128013080123e-05, + "loss": 0.12816619873046875, + "step": 8695 + }, + { + "epoch": 0.587805867243477, + "grad_norm": 0.7423731684684753, + "learning_rate": 1.1591914693785944e-05, + "loss": 0.12164306640625, + "step": 8696 + }, + { + "epoch": 0.5878734622144113, + "grad_norm": 0.657917857170105, + "learning_rate": 1.1588701539396116e-05, + "loss": 0.1746673583984375, + "step": 8697 + }, + { + "epoch": 0.5879410571853454, + "grad_norm": 0.3802858591079712, + "learning_rate": 1.158548855006613e-05, + "loss": 0.051937103271484375, + "step": 8698 + }, + { + "epoch": 0.5880086521562796, + "grad_norm": 0.8764135837554932, + "learning_rate": 1.1582275725951438e-05, + "loss": 0.176666259765625, + "step": 8699 + }, + { + "epoch": 0.5880762471272137, + "grad_norm": 0.6139892935752869, + "learning_rate": 1.1579063067207502e-05, + "loss": 0.091705322265625, + "step": 8700 + }, + { + "epoch": 0.5881438420981479, + "grad_norm": 0.5468533039093018, + "learning_rate": 1.1575850573989768e-05, + "loss": 0.10638046264648438, + "step": 8701 + }, + { + "epoch": 0.5882114370690821, + "grad_norm": 0.35837945342063904, + "learning_rate": 1.1572638246453683e-05, + "loss": 0.05406951904296875, + "step": 8702 + }, + { + "epoch": 0.5882790320400162, + "grad_norm": 0.49726080894470215, + "learning_rate": 1.1569426084754668e-05, + "loss": 0.11445236206054688, + "step": 8703 + }, + { + "epoch": 0.5883466270109504, + "grad_norm": 0.3202618360519409, + "learning_rate": 1.1566214089048156e-05, + "loss": 0.08016777038574219, + "step": 8704 + }, + { + "epoch": 0.5884142219818845, + "grad_norm": 0.3419942259788513, + "learning_rate": 1.156300225948956e-05, + "loss": 0.0679168701171875, + "step": 8705 + }, + { + "epoch": 0.5884818169528188, + "grad_norm": 0.5217346549034119, + "learning_rate": 1.1559790596234294e-05, + "loss": 0.152374267578125, + "step": 8706 + }, + { + "epoch": 0.5885494119237529, + "grad_norm": 0.43999916315078735, + "learning_rate": 1.155657909943775e-05, + "loss": 0.0816497802734375, + "step": 8707 + }, + { + "epoch": 0.588617006894687, + "grad_norm": 0.6483472585678101, + "learning_rate": 1.1553367769255319e-05, + "loss": 0.09523773193359375, + "step": 8708 + }, + { + "epoch": 0.5886846018656212, + "grad_norm": 0.7804378867149353, + "learning_rate": 1.1550156605842397e-05, + "loss": 0.10160446166992188, + "step": 8709 + }, + { + "epoch": 0.5887521968365553, + "grad_norm": 1.2145999670028687, + "learning_rate": 1.1546945609354348e-05, + "loss": 0.17645263671875, + "step": 8710 + }, + { + "epoch": 0.5888197918074896, + "grad_norm": 0.40048685669898987, + "learning_rate": 1.1543734779946551e-05, + "loss": 0.0674896240234375, + "step": 8711 + }, + { + "epoch": 0.5888873867784237, + "grad_norm": 0.45446011424064636, + "learning_rate": 1.1540524117774352e-05, + "loss": 0.10235595703125, + "step": 8712 + }, + { + "epoch": 0.5889549817493579, + "grad_norm": 1.320854663848877, + "learning_rate": 1.1537313622993115e-05, + "loss": 0.21173858642578125, + "step": 8713 + }, + { + "epoch": 0.589022576720292, + "grad_norm": 0.9009501338005066, + "learning_rate": 1.1534103295758174e-05, + "loss": 0.12017822265625, + "step": 8714 + }, + { + "epoch": 0.5890901716912261, + "grad_norm": 0.7884964942932129, + "learning_rate": 1.1530893136224878e-05, + "loss": 0.140380859375, + "step": 8715 + }, + { + "epoch": 0.5891577666621604, + "grad_norm": 0.3988015353679657, + "learning_rate": 1.1527683144548537e-05, + "loss": 0.065521240234375, + "step": 8716 + }, + { + "epoch": 0.5892253616330945, + "grad_norm": 0.7668401598930359, + "learning_rate": 1.1524473320884487e-05, + "loss": 0.13910675048828125, + "step": 8717 + }, + { + "epoch": 0.5892929566040287, + "grad_norm": 1.1118667125701904, + "learning_rate": 1.152126366538803e-05, + "loss": 0.18328857421875, + "step": 8718 + }, + { + "epoch": 0.5893605515749628, + "grad_norm": 0.7662270665168762, + "learning_rate": 1.1518054178214468e-05, + "loss": 0.1768341064453125, + "step": 8719 + }, + { + "epoch": 0.589428146545897, + "grad_norm": 0.188451886177063, + "learning_rate": 1.1514844859519103e-05, + "loss": 0.02191448211669922, + "step": 8720 + }, + { + "epoch": 0.5894957415168312, + "grad_norm": 0.6363850235939026, + "learning_rate": 1.1511635709457211e-05, + "loss": 0.0943603515625, + "step": 8721 + }, + { + "epoch": 0.5895633364877653, + "grad_norm": 0.5616740584373474, + "learning_rate": 1.1508426728184085e-05, + "loss": 0.103302001953125, + "step": 8722 + }, + { + "epoch": 0.5896309314586995, + "grad_norm": 0.8014079928398132, + "learning_rate": 1.1505217915854983e-05, + "loss": 0.166412353515625, + "step": 8723 + }, + { + "epoch": 0.5896985264296336, + "grad_norm": 2.3329646587371826, + "learning_rate": 1.1502009272625173e-05, + "loss": 0.25701904296875, + "step": 8724 + }, + { + "epoch": 0.5897661214005678, + "grad_norm": 0.5780616998672485, + "learning_rate": 1.1498800798649906e-05, + "loss": 0.11402511596679688, + "step": 8725 + }, + { + "epoch": 0.589833716371502, + "grad_norm": 0.38926443457603455, + "learning_rate": 1.1495592494084437e-05, + "loss": 0.06646347045898438, + "step": 8726 + }, + { + "epoch": 0.5899013113424362, + "grad_norm": 0.5442824363708496, + "learning_rate": 1.1492384359083988e-05, + "loss": 0.086334228515625, + "step": 8727 + }, + { + "epoch": 0.5899689063133703, + "grad_norm": 1.3359005451202393, + "learning_rate": 1.1489176393803807e-05, + "loss": 0.20721435546875, + "step": 8728 + }, + { + "epoch": 0.5900365012843044, + "grad_norm": 0.4803200364112854, + "learning_rate": 1.1485968598399098e-05, + "loss": 0.0891571044921875, + "step": 8729 + }, + { + "epoch": 0.5901040962552386, + "grad_norm": 0.28654879331588745, + "learning_rate": 1.1482760973025085e-05, + "loss": 0.0536956787109375, + "step": 8730 + }, + { + "epoch": 0.5901716912261727, + "grad_norm": 0.438502699136734, + "learning_rate": 1.1479553517836973e-05, + "loss": 0.0823822021484375, + "step": 8731 + }, + { + "epoch": 0.590239286197107, + "grad_norm": 0.5209841728210449, + "learning_rate": 1.1476346232989947e-05, + "loss": 0.08020782470703125, + "step": 8732 + }, + { + "epoch": 0.5903068811680411, + "grad_norm": 0.45463600754737854, + "learning_rate": 1.1473139118639208e-05, + "loss": 0.080657958984375, + "step": 8733 + }, + { + "epoch": 0.5903744761389753, + "grad_norm": 0.2506703734397888, + "learning_rate": 1.146993217493993e-05, + "loss": 0.03681182861328125, + "step": 8734 + }, + { + "epoch": 0.5904420711099094, + "grad_norm": 0.9088581204414368, + "learning_rate": 1.146672540204729e-05, + "loss": 0.1224212646484375, + "step": 8735 + }, + { + "epoch": 0.5905096660808435, + "grad_norm": 0.44022276997566223, + "learning_rate": 1.1463518800116443e-05, + "loss": 0.10698699951171875, + "step": 8736 + }, + { + "epoch": 0.5905772610517778, + "grad_norm": 1.1694424152374268, + "learning_rate": 1.1460312369302554e-05, + "loss": 0.216400146484375, + "step": 8737 + }, + { + "epoch": 0.5906448560227119, + "grad_norm": 0.7747116684913635, + "learning_rate": 1.1457106109760763e-05, + "loss": 0.119049072265625, + "step": 8738 + }, + { + "epoch": 0.5907124509936461, + "grad_norm": 0.45350363850593567, + "learning_rate": 1.1453900021646216e-05, + "loss": 0.0957489013671875, + "step": 8739 + }, + { + "epoch": 0.5907800459645802, + "grad_norm": 0.6465596556663513, + "learning_rate": 1.1450694105114033e-05, + "loss": 0.12364959716796875, + "step": 8740 + }, + { + "epoch": 0.5908476409355145, + "grad_norm": 0.39962926506996155, + "learning_rate": 1.1447488360319346e-05, + "loss": 0.0980987548828125, + "step": 8741 + }, + { + "epoch": 0.5909152359064486, + "grad_norm": 0.27539315819740295, + "learning_rate": 1.1444282787417266e-05, + "loss": 0.049083709716796875, + "step": 8742 + }, + { + "epoch": 0.5909828308773827, + "grad_norm": 0.8457629084587097, + "learning_rate": 1.1441077386562894e-05, + "loss": 0.1557769775390625, + "step": 8743 + }, + { + "epoch": 0.5910504258483169, + "grad_norm": 0.23895163834095, + "learning_rate": 1.1437872157911335e-05, + "loss": 0.0442962646484375, + "step": 8744 + }, + { + "epoch": 0.591118020819251, + "grad_norm": 0.7584761381149292, + "learning_rate": 1.1434667101617669e-05, + "loss": 0.13901519775390625, + "step": 8745 + }, + { + "epoch": 0.5911856157901852, + "grad_norm": 1.1476383209228516, + "learning_rate": 1.1431462217836984e-05, + "loss": 0.208251953125, + "step": 8746 + }, + { + "epoch": 0.5912532107611194, + "grad_norm": 0.5359832048416138, + "learning_rate": 1.1428257506724345e-05, + "loss": 0.09037017822265625, + "step": 8747 + }, + { + "epoch": 0.5913208057320536, + "grad_norm": 0.2717360556125641, + "learning_rate": 1.1425052968434827e-05, + "loss": 0.048694610595703125, + "step": 8748 + }, + { + "epoch": 0.5913884007029877, + "grad_norm": 0.17466354370117188, + "learning_rate": 1.1421848603123472e-05, + "loss": 0.02945709228515625, + "step": 8749 + }, + { + "epoch": 0.5914559956739218, + "grad_norm": 0.7867473363876343, + "learning_rate": 1.1418644410945339e-05, + "loss": 0.12517547607421875, + "step": 8750 + }, + { + "epoch": 0.591523590644856, + "grad_norm": 0.5480712056159973, + "learning_rate": 1.1415440392055456e-05, + "loss": 0.0963592529296875, + "step": 8751 + }, + { + "epoch": 0.5915911856157902, + "grad_norm": 0.2677794396877289, + "learning_rate": 1.1412236546608868e-05, + "loss": 0.0240631103515625, + "step": 8752 + }, + { + "epoch": 0.5916587805867244, + "grad_norm": 0.48878994584083557, + "learning_rate": 1.1409032874760584e-05, + "loss": 0.0895233154296875, + "step": 8753 + }, + { + "epoch": 0.5917263755576585, + "grad_norm": 0.6922009587287903, + "learning_rate": 1.1405829376665617e-05, + "loss": 0.1529541015625, + "step": 8754 + }, + { + "epoch": 0.5917939705285927, + "grad_norm": 0.6852201223373413, + "learning_rate": 1.1402626052478983e-05, + "loss": 0.148529052734375, + "step": 8755 + }, + { + "epoch": 0.5918615654995268, + "grad_norm": 0.3059256970882416, + "learning_rate": 1.1399422902355667e-05, + "loss": 0.07114410400390625, + "step": 8756 + }, + { + "epoch": 0.591929160470461, + "grad_norm": 1.481431245803833, + "learning_rate": 1.1396219926450668e-05, + "loss": 0.178466796875, + "step": 8757 + }, + { + "epoch": 0.5919967554413952, + "grad_norm": 1.222131371498108, + "learning_rate": 1.1393017124918956e-05, + "loss": 0.211090087890625, + "step": 8758 + }, + { + "epoch": 0.5920643504123293, + "grad_norm": 0.35586684942245483, + "learning_rate": 1.1389814497915508e-05, + "loss": 0.06615829467773438, + "step": 8759 + }, + { + "epoch": 0.5921319453832635, + "grad_norm": 0.45236021280288696, + "learning_rate": 1.1386612045595285e-05, + "loss": 0.07763671875, + "step": 8760 + }, + { + "epoch": 0.5921995403541976, + "grad_norm": 0.7819872498512268, + "learning_rate": 1.1383409768113247e-05, + "loss": 0.14266395568847656, + "step": 8761 + }, + { + "epoch": 0.5922671353251318, + "grad_norm": 0.5921935439109802, + "learning_rate": 1.1380207665624332e-05, + "loss": 0.1251678466796875, + "step": 8762 + }, + { + "epoch": 0.592334730296066, + "grad_norm": 0.7033315896987915, + "learning_rate": 1.1377005738283483e-05, + "loss": 0.133697509765625, + "step": 8763 + }, + { + "epoch": 0.5924023252670001, + "grad_norm": 0.29524415731430054, + "learning_rate": 1.1373803986245624e-05, + "loss": 0.05478858947753906, + "step": 8764 + }, + { + "epoch": 0.5924699202379343, + "grad_norm": 0.5288033485412598, + "learning_rate": 1.1370602409665686e-05, + "loss": 0.10787010192871094, + "step": 8765 + }, + { + "epoch": 0.5925375152088684, + "grad_norm": 0.6563265323638916, + "learning_rate": 1.1367401008698571e-05, + "loss": 0.13531494140625, + "step": 8766 + }, + { + "epoch": 0.5926051101798027, + "grad_norm": 0.9061560034751892, + "learning_rate": 1.1364199783499185e-05, + "loss": 0.1229715347290039, + "step": 8767 + }, + { + "epoch": 0.5926727051507368, + "grad_norm": 0.879242479801178, + "learning_rate": 1.136099873422243e-05, + "loss": 0.174530029296875, + "step": 8768 + }, + { + "epoch": 0.5927403001216709, + "grad_norm": 0.22865109145641327, + "learning_rate": 1.1357797861023179e-05, + "loss": 0.02452564239501953, + "step": 8769 + }, + { + "epoch": 0.5928078950926051, + "grad_norm": 0.7363408207893372, + "learning_rate": 1.135459716405632e-05, + "loss": 0.146697998046875, + "step": 8770 + }, + { + "epoch": 0.5928754900635392, + "grad_norm": 0.5828422904014587, + "learning_rate": 1.1351396643476719e-05, + "loss": 0.1030731201171875, + "step": 8771 + }, + { + "epoch": 0.5929430850344735, + "grad_norm": 0.8649671673774719, + "learning_rate": 1.1348196299439247e-05, + "loss": 0.149871826171875, + "step": 8772 + }, + { + "epoch": 0.5930106800054076, + "grad_norm": 0.33585885167121887, + "learning_rate": 1.1344996132098738e-05, + "loss": 0.05846405029296875, + "step": 8773 + }, + { + "epoch": 0.5930782749763418, + "grad_norm": 0.7444250583648682, + "learning_rate": 1.1341796141610056e-05, + "loss": 0.11811256408691406, + "step": 8774 + }, + { + "epoch": 0.5931458699472759, + "grad_norm": 0.20909088850021362, + "learning_rate": 1.133859632812802e-05, + "loss": 0.03202056884765625, + "step": 8775 + }, + { + "epoch": 0.59321346491821, + "grad_norm": 0.31557416915893555, + "learning_rate": 1.1335396691807467e-05, + "loss": 0.064971923828125, + "step": 8776 + }, + { + "epoch": 0.5932810598891443, + "grad_norm": 1.398844599723816, + "learning_rate": 1.1332197232803214e-05, + "loss": 0.13641357421875, + "step": 8777 + }, + { + "epoch": 0.5933486548600784, + "grad_norm": 0.2569015622138977, + "learning_rate": 1.1328997951270063e-05, + "loss": 0.04959869384765625, + "step": 8778 + }, + { + "epoch": 0.5934162498310126, + "grad_norm": 0.4476928412914276, + "learning_rate": 1.1325798847362825e-05, + "loss": 0.08039474487304688, + "step": 8779 + }, + { + "epoch": 0.5934838448019467, + "grad_norm": 0.2587316334247589, + "learning_rate": 1.1322599921236284e-05, + "loss": 0.05544281005859375, + "step": 8780 + }, + { + "epoch": 0.5935514397728809, + "grad_norm": 0.4901208281517029, + "learning_rate": 1.1319401173045234e-05, + "loss": 0.07426071166992188, + "step": 8781 + }, + { + "epoch": 0.593619034743815, + "grad_norm": 0.4089511036872864, + "learning_rate": 1.131620260294444e-05, + "loss": 0.0800933837890625, + "step": 8782 + }, + { + "epoch": 0.5936866297147492, + "grad_norm": 0.960300862789154, + "learning_rate": 1.1313004211088675e-05, + "loss": 0.1643829345703125, + "step": 8783 + }, + { + "epoch": 0.5937542246856834, + "grad_norm": 0.8808115720748901, + "learning_rate": 1.1309805997632693e-05, + "loss": 0.1118316650390625, + "step": 8784 + }, + { + "epoch": 0.5938218196566175, + "grad_norm": 0.6309084296226501, + "learning_rate": 1.1306607962731253e-05, + "loss": 0.1019287109375, + "step": 8785 + }, + { + "epoch": 0.5938894146275517, + "grad_norm": 0.24719767272472382, + "learning_rate": 1.130341010653908e-05, + "loss": 0.0497283935546875, + "step": 8786 + }, + { + "epoch": 0.5939570095984859, + "grad_norm": 0.2190537303686142, + "learning_rate": 1.130021242921092e-05, + "loss": 0.03949165344238281, + "step": 8787 + }, + { + "epoch": 0.5940246045694201, + "grad_norm": 0.5991580486297607, + "learning_rate": 1.1297014930901493e-05, + "loss": 0.0764007568359375, + "step": 8788 + }, + { + "epoch": 0.5940921995403542, + "grad_norm": 0.2801409661769867, + "learning_rate": 1.129381761176551e-05, + "loss": 0.042236328125, + "step": 8789 + }, + { + "epoch": 0.5941597945112883, + "grad_norm": 0.759441614151001, + "learning_rate": 1.1290620471957683e-05, + "loss": 0.12043380737304688, + "step": 8790 + }, + { + "epoch": 0.5942273894822225, + "grad_norm": 0.8971043229103088, + "learning_rate": 1.1287423511632697e-05, + "loss": 0.15110301971435547, + "step": 8791 + }, + { + "epoch": 0.5942949844531566, + "grad_norm": 0.4040175676345825, + "learning_rate": 1.1284226730945256e-05, + "loss": 0.06116485595703125, + "step": 8792 + }, + { + "epoch": 0.5943625794240909, + "grad_norm": 1.182439923286438, + "learning_rate": 1.128103013005003e-05, + "loss": 0.194427490234375, + "step": 8793 + }, + { + "epoch": 0.594430174395025, + "grad_norm": 0.579251229763031, + "learning_rate": 1.1277833709101702e-05, + "loss": 0.1043548583984375, + "step": 8794 + }, + { + "epoch": 0.5944977693659592, + "grad_norm": 0.4196612536907196, + "learning_rate": 1.127463746825492e-05, + "loss": 0.0830230712890625, + "step": 8795 + }, + { + "epoch": 0.5945653643368933, + "grad_norm": 0.4090072810649872, + "learning_rate": 1.1271441407664346e-05, + "loss": 0.06456375122070312, + "step": 8796 + }, + { + "epoch": 0.5946329593078274, + "grad_norm": 1.6054543256759644, + "learning_rate": 1.1268245527484623e-05, + "loss": 0.16815185546875, + "step": 8797 + }, + { + "epoch": 0.5947005542787617, + "grad_norm": 0.7554404735565186, + "learning_rate": 1.1265049827870394e-05, + "loss": 0.113800048828125, + "step": 8798 + }, + { + "epoch": 0.5947681492496958, + "grad_norm": 0.4215681254863739, + "learning_rate": 1.126185430897628e-05, + "loss": 0.081634521484375, + "step": 8799 + }, + { + "epoch": 0.59483574422063, + "grad_norm": 0.2497863918542862, + "learning_rate": 1.1258658970956901e-05, + "loss": 0.0399627685546875, + "step": 8800 + }, + { + "epoch": 0.5949033391915641, + "grad_norm": 0.8920559883117676, + "learning_rate": 1.1255463813966871e-05, + "loss": 0.1719207763671875, + "step": 8801 + }, + { + "epoch": 0.5949709341624984, + "grad_norm": 1.08437979221344, + "learning_rate": 1.1252268838160783e-05, + "loss": 0.17916107177734375, + "step": 8802 + }, + { + "epoch": 0.5950385291334325, + "grad_norm": 0.5894260406494141, + "learning_rate": 1.1249074043693242e-05, + "loss": 0.1167144775390625, + "step": 8803 + }, + { + "epoch": 0.5951061241043666, + "grad_norm": 0.8110954761505127, + "learning_rate": 1.124587943071882e-05, + "loss": 0.11504554748535156, + "step": 8804 + }, + { + "epoch": 0.5951737190753008, + "grad_norm": 0.6065567135810852, + "learning_rate": 1.12426849993921e-05, + "loss": 0.09201431274414062, + "step": 8805 + }, + { + "epoch": 0.5952413140462349, + "grad_norm": 0.41453686356544495, + "learning_rate": 1.1239490749867644e-05, + "loss": 0.077239990234375, + "step": 8806 + }, + { + "epoch": 0.5953089090171692, + "grad_norm": 0.414018839597702, + "learning_rate": 1.1236296682300016e-05, + "loss": 0.0830078125, + "step": 8807 + }, + { + "epoch": 0.5953765039881033, + "grad_norm": 0.4606839418411255, + "learning_rate": 1.1233102796843756e-05, + "loss": 0.07525634765625, + "step": 8808 + }, + { + "epoch": 0.5954440989590375, + "grad_norm": 0.36045247316360474, + "learning_rate": 1.1229909093653413e-05, + "loss": 0.07299995422363281, + "step": 8809 + }, + { + "epoch": 0.5955116939299716, + "grad_norm": 0.8405773043632507, + "learning_rate": 1.122671557288351e-05, + "loss": 0.1422882080078125, + "step": 8810 + }, + { + "epoch": 0.5955792889009057, + "grad_norm": 0.2817588448524475, + "learning_rate": 1.1223522234688578e-05, + "loss": 0.06752777099609375, + "step": 8811 + }, + { + "epoch": 0.59564688387184, + "grad_norm": 0.6804250478744507, + "learning_rate": 1.1220329079223125e-05, + "loss": 0.158782958984375, + "step": 8812 + }, + { + "epoch": 0.5957144788427741, + "grad_norm": 0.3859122693538666, + "learning_rate": 1.1217136106641651e-05, + "loss": 0.0669403076171875, + "step": 8813 + }, + { + "epoch": 0.5957820738137083, + "grad_norm": 0.32459554076194763, + "learning_rate": 1.1213943317098666e-05, + "loss": 0.05552101135253906, + "step": 8814 + }, + { + "epoch": 0.5958496687846424, + "grad_norm": 0.17421455681324005, + "learning_rate": 1.1210750710748642e-05, + "loss": 0.028377532958984375, + "step": 8815 + }, + { + "epoch": 0.5959172637555766, + "grad_norm": 0.9790927767753601, + "learning_rate": 1.1207558287746068e-05, + "loss": 0.1297607421875, + "step": 8816 + }, + { + "epoch": 0.5959848587265107, + "grad_norm": 0.46447262167930603, + "learning_rate": 1.1204366048245404e-05, + "loss": 0.091766357421875, + "step": 8817 + }, + { + "epoch": 0.5960524536974449, + "grad_norm": 0.2946593463420868, + "learning_rate": 1.1201173992401123e-05, + "loss": 0.054351806640625, + "step": 8818 + }, + { + "epoch": 0.5961200486683791, + "grad_norm": 0.5912331342697144, + "learning_rate": 1.1197982120367662e-05, + "loss": 0.11385345458984375, + "step": 8819 + }, + { + "epoch": 0.5961876436393132, + "grad_norm": 0.666347086429596, + "learning_rate": 1.1194790432299474e-05, + "loss": 0.12530517578125, + "step": 8820 + }, + { + "epoch": 0.5962552386102474, + "grad_norm": 1.2971686124801636, + "learning_rate": 1.1191598928350988e-05, + "loss": 0.245147705078125, + "step": 8821 + }, + { + "epoch": 0.5963228335811815, + "grad_norm": 0.6882233023643494, + "learning_rate": 1.1188407608676628e-05, + "loss": 0.12494659423828125, + "step": 8822 + }, + { + "epoch": 0.5963904285521158, + "grad_norm": 0.2587801516056061, + "learning_rate": 1.1185216473430819e-05, + "loss": 0.03934478759765625, + "step": 8823 + }, + { + "epoch": 0.5964580235230499, + "grad_norm": 1.0850162506103516, + "learning_rate": 1.1182025522767953e-05, + "loss": 0.212890625, + "step": 8824 + }, + { + "epoch": 0.596525618493984, + "grad_norm": 0.702010989189148, + "learning_rate": 1.1178834756842441e-05, + "loss": 0.15021514892578125, + "step": 8825 + }, + { + "epoch": 0.5965932134649182, + "grad_norm": 0.8669838905334473, + "learning_rate": 1.117564417580866e-05, + "loss": 0.1296977996826172, + "step": 8826 + }, + { + "epoch": 0.5966608084358523, + "grad_norm": 0.2532128691673279, + "learning_rate": 1.1172453779821008e-05, + "loss": 0.052165985107421875, + "step": 8827 + }, + { + "epoch": 0.5967284034067866, + "grad_norm": 0.7761054039001465, + "learning_rate": 1.1169263569033836e-05, + "loss": 0.1315460205078125, + "step": 8828 + }, + { + "epoch": 0.5967959983777207, + "grad_norm": 0.554452121257782, + "learning_rate": 1.116607354360152e-05, + "loss": 0.139892578125, + "step": 8829 + }, + { + "epoch": 0.5968635933486549, + "grad_norm": 0.842518150806427, + "learning_rate": 1.1162883703678404e-05, + "loss": 0.1484222412109375, + "step": 8830 + }, + { + "epoch": 0.596931188319589, + "grad_norm": 0.3960668742656708, + "learning_rate": 1.1159694049418848e-05, + "loss": 0.0903778076171875, + "step": 8831 + }, + { + "epoch": 0.5969987832905231, + "grad_norm": 0.2982543408870697, + "learning_rate": 1.1156504580977164e-05, + "loss": 0.06288337707519531, + "step": 8832 + }, + { + "epoch": 0.5970663782614574, + "grad_norm": 0.8139584064483643, + "learning_rate": 1.1153315298507697e-05, + "loss": 0.158477783203125, + "step": 8833 + }, + { + "epoch": 0.5971339732323915, + "grad_norm": 0.851645290851593, + "learning_rate": 1.1150126202164757e-05, + "loss": 0.175048828125, + "step": 8834 + }, + { + "epoch": 0.5972015682033257, + "grad_norm": 0.8140777945518494, + "learning_rate": 1.1146937292102657e-05, + "loss": 0.13315200805664062, + "step": 8835 + }, + { + "epoch": 0.5972691631742598, + "grad_norm": 0.6296548843383789, + "learning_rate": 1.1143748568475694e-05, + "loss": 0.1287841796875, + "step": 8836 + }, + { + "epoch": 0.597336758145194, + "grad_norm": 0.6005533337593079, + "learning_rate": 1.114056003143815e-05, + "loss": 0.07067108154296875, + "step": 8837 + }, + { + "epoch": 0.5974043531161282, + "grad_norm": 0.7229066491127014, + "learning_rate": 1.1137371681144316e-05, + "loss": 0.1224365234375, + "step": 8838 + }, + { + "epoch": 0.5974719480870623, + "grad_norm": 1.1087099313735962, + "learning_rate": 1.113418351774846e-05, + "loss": 0.1499481201171875, + "step": 8839 + }, + { + "epoch": 0.5975395430579965, + "grad_norm": 0.3155130445957184, + "learning_rate": 1.1130995541404851e-05, + "loss": 0.05279541015625, + "step": 8840 + }, + { + "epoch": 0.5976071380289306, + "grad_norm": 0.45579561591148376, + "learning_rate": 1.1127807752267734e-05, + "loss": 0.100433349609375, + "step": 8841 + }, + { + "epoch": 0.5976747329998648, + "grad_norm": 0.8498456478118896, + "learning_rate": 1.1124620150491361e-05, + "loss": 0.195770263671875, + "step": 8842 + }, + { + "epoch": 0.597742327970799, + "grad_norm": 0.4835982620716095, + "learning_rate": 1.1121432736229964e-05, + "loss": 0.08702850341796875, + "step": 8843 + }, + { + "epoch": 0.5978099229417332, + "grad_norm": 1.3629133701324463, + "learning_rate": 1.1118245509637777e-05, + "loss": 0.1582183837890625, + "step": 8844 + }, + { + "epoch": 0.5978775179126673, + "grad_norm": 0.6308310627937317, + "learning_rate": 1.1115058470869005e-05, + "loss": 0.1207275390625, + "step": 8845 + }, + { + "epoch": 0.5979451128836014, + "grad_norm": 0.6576707363128662, + "learning_rate": 1.1111871620077869e-05, + "loss": 0.1088714599609375, + "step": 8846 + }, + { + "epoch": 0.5980127078545356, + "grad_norm": 0.2096441239118576, + "learning_rate": 1.1108684957418568e-05, + "loss": 0.02648162841796875, + "step": 8847 + }, + { + "epoch": 0.5980803028254698, + "grad_norm": 0.403861939907074, + "learning_rate": 1.1105498483045282e-05, + "loss": 0.058349609375, + "step": 8848 + }, + { + "epoch": 0.598147897796404, + "grad_norm": 0.8891184329986572, + "learning_rate": 1.1102312197112205e-05, + "loss": 0.16119384765625, + "step": 8849 + }, + { + "epoch": 0.5982154927673381, + "grad_norm": 0.6466841101646423, + "learning_rate": 1.1099126099773496e-05, + "loss": 0.10528564453125, + "step": 8850 + }, + { + "epoch": 0.5982830877382723, + "grad_norm": 0.16723422706127167, + "learning_rate": 1.1095940191183332e-05, + "loss": 0.02295684814453125, + "step": 8851 + }, + { + "epoch": 0.5983506827092064, + "grad_norm": 0.7306435108184814, + "learning_rate": 1.1092754471495853e-05, + "loss": 0.1016082763671875, + "step": 8852 + }, + { + "epoch": 0.5984182776801406, + "grad_norm": 0.6838451027870178, + "learning_rate": 1.1089568940865221e-05, + "loss": 0.1577301025390625, + "step": 8853 + }, + { + "epoch": 0.5984858726510748, + "grad_norm": 0.7699760794639587, + "learning_rate": 1.1086383599445555e-05, + "loss": 0.11778640747070312, + "step": 8854 + }, + { + "epoch": 0.5985534676220089, + "grad_norm": 1.8700913190841675, + "learning_rate": 1.1083198447390992e-05, + "loss": 0.253448486328125, + "step": 8855 + }, + { + "epoch": 0.5986210625929431, + "grad_norm": 0.7487857937812805, + "learning_rate": 1.1080013484855645e-05, + "loss": 0.154449462890625, + "step": 8856 + }, + { + "epoch": 0.5986886575638772, + "grad_norm": 0.46444353461265564, + "learning_rate": 1.1076828711993627e-05, + "loss": 0.0914154052734375, + "step": 8857 + }, + { + "epoch": 0.5987562525348115, + "grad_norm": 1.4415751695632935, + "learning_rate": 1.1073644128959032e-05, + "loss": 0.256866455078125, + "step": 8858 + }, + { + "epoch": 0.5988238475057456, + "grad_norm": 0.5466572642326355, + "learning_rate": 1.107045973590595e-05, + "loss": 0.10736846923828125, + "step": 8859 + }, + { + "epoch": 0.5988914424766797, + "grad_norm": 0.7063121199607849, + "learning_rate": 1.1067275532988468e-05, + "loss": 0.173065185546875, + "step": 8860 + }, + { + "epoch": 0.5989590374476139, + "grad_norm": 0.46905517578125, + "learning_rate": 1.1064091520360647e-05, + "loss": 0.067657470703125, + "step": 8861 + }, + { + "epoch": 0.599026632418548, + "grad_norm": 1.127015233039856, + "learning_rate": 1.106090769817656e-05, + "loss": 0.153472900390625, + "step": 8862 + }, + { + "epoch": 0.5990942273894823, + "grad_norm": 0.5392553806304932, + "learning_rate": 1.105772406659025e-05, + "loss": 0.08719253540039062, + "step": 8863 + }, + { + "epoch": 0.5991618223604164, + "grad_norm": 0.7490963339805603, + "learning_rate": 1.1054540625755775e-05, + "loss": 0.1258544921875, + "step": 8864 + }, + { + "epoch": 0.5992294173313506, + "grad_norm": 0.9385262727737427, + "learning_rate": 1.1051357375827153e-05, + "loss": 0.176910400390625, + "step": 8865 + }, + { + "epoch": 0.5992970123022847, + "grad_norm": 0.768513560295105, + "learning_rate": 1.1048174316958427e-05, + "loss": 0.12762451171875, + "step": 8866 + }, + { + "epoch": 0.5993646072732188, + "grad_norm": 0.5855088829994202, + "learning_rate": 1.1044991449303594e-05, + "loss": 0.09771347045898438, + "step": 8867 + }, + { + "epoch": 0.5994322022441531, + "grad_norm": 0.6160845756530762, + "learning_rate": 1.1041808773016676e-05, + "loss": 0.10827159881591797, + "step": 8868 + }, + { + "epoch": 0.5994997972150872, + "grad_norm": 0.29479724168777466, + "learning_rate": 1.103862628825166e-05, + "loss": 0.06960296630859375, + "step": 8869 + }, + { + "epoch": 0.5995673921860214, + "grad_norm": 0.47106996178627014, + "learning_rate": 1.1035443995162545e-05, + "loss": 0.07470321655273438, + "step": 8870 + }, + { + "epoch": 0.5996349871569555, + "grad_norm": 0.4870088994503021, + "learning_rate": 1.1032261893903305e-05, + "loss": 0.0914764404296875, + "step": 8871 + }, + { + "epoch": 0.5997025821278897, + "grad_norm": 0.43652355670928955, + "learning_rate": 1.1029079984627905e-05, + "loss": 0.1004180908203125, + "step": 8872 + }, + { + "epoch": 0.5997701770988239, + "grad_norm": 1.3472177982330322, + "learning_rate": 1.1025898267490316e-05, + "loss": 0.1788482666015625, + "step": 8873 + }, + { + "epoch": 0.599837772069758, + "grad_norm": 0.23054972290992737, + "learning_rate": 1.1022716742644479e-05, + "loss": 0.03448486328125, + "step": 8874 + }, + { + "epoch": 0.5999053670406922, + "grad_norm": 1.2516138553619385, + "learning_rate": 1.1019535410244342e-05, + "loss": 0.190673828125, + "step": 8875 + }, + { + "epoch": 0.5999729620116263, + "grad_norm": 1.275038719177246, + "learning_rate": 1.1016354270443833e-05, + "loss": 0.225067138671875, + "step": 8876 + }, + { + "epoch": 0.6000405569825605, + "grad_norm": 0.8470577001571655, + "learning_rate": 1.1013173323396886e-05, + "loss": 0.193328857421875, + "step": 8877 + }, + { + "epoch": 0.6001081519534947, + "grad_norm": 0.6553493142127991, + "learning_rate": 1.10099925692574e-05, + "loss": 0.1382904052734375, + "step": 8878 + }, + { + "epoch": 0.6001757469244289, + "grad_norm": 0.7975427508354187, + "learning_rate": 1.100681200817929e-05, + "loss": 0.1260986328125, + "step": 8879 + }, + { + "epoch": 0.600243341895363, + "grad_norm": 0.21122673153877258, + "learning_rate": 1.1003631640316446e-05, + "loss": 0.036861419677734375, + "step": 8880 + }, + { + "epoch": 0.6003109368662971, + "grad_norm": 0.5980446934700012, + "learning_rate": 1.1000451465822764e-05, + "loss": 0.0956878662109375, + "step": 8881 + }, + { + "epoch": 0.6003785318372313, + "grad_norm": 0.9331926703453064, + "learning_rate": 1.0997271484852111e-05, + "loss": 0.16497802734375, + "step": 8882 + }, + { + "epoch": 0.6004461268081654, + "grad_norm": 0.9744410514831543, + "learning_rate": 1.099409169755835e-05, + "loss": 0.187103271484375, + "step": 8883 + }, + { + "epoch": 0.6005137217790997, + "grad_norm": 0.7606461048126221, + "learning_rate": 1.0990912104095348e-05, + "loss": 0.1360931396484375, + "step": 8884 + }, + { + "epoch": 0.6005813167500338, + "grad_norm": 0.4836505651473999, + "learning_rate": 1.0987732704616948e-05, + "loss": 0.115081787109375, + "step": 8885 + }, + { + "epoch": 0.600648911720968, + "grad_norm": 0.9688871502876282, + "learning_rate": 1.0984553499276999e-05, + "loss": 0.180084228515625, + "step": 8886 + }, + { + "epoch": 0.6007165066919021, + "grad_norm": 0.46694886684417725, + "learning_rate": 1.0981374488229317e-05, + "loss": 0.098602294921875, + "step": 8887 + }, + { + "epoch": 0.6007841016628362, + "grad_norm": 0.19138115644454956, + "learning_rate": 1.097819567162773e-05, + "loss": 0.02465057373046875, + "step": 8888 + }, + { + "epoch": 0.6008516966337705, + "grad_norm": 0.368527352809906, + "learning_rate": 1.0975017049626048e-05, + "loss": 0.06928253173828125, + "step": 8889 + }, + { + "epoch": 0.6009192916047046, + "grad_norm": 0.9394838809967041, + "learning_rate": 1.0971838622378076e-05, + "loss": 0.2274627685546875, + "step": 8890 + }, + { + "epoch": 0.6009868865756388, + "grad_norm": 0.19444933533668518, + "learning_rate": 1.0968660390037596e-05, + "loss": 0.024372100830078125, + "step": 8891 + }, + { + "epoch": 0.6010544815465729, + "grad_norm": 0.6427584290504456, + "learning_rate": 1.09654823527584e-05, + "loss": 0.12747955322265625, + "step": 8892 + }, + { + "epoch": 0.601122076517507, + "grad_norm": 0.4100477993488312, + "learning_rate": 1.0962304510694264e-05, + "loss": 0.0687103271484375, + "step": 8893 + }, + { + "epoch": 0.6011896714884413, + "grad_norm": 0.25107085704803467, + "learning_rate": 1.0959126863998937e-05, + "loss": 0.042690277099609375, + "step": 8894 + }, + { + "epoch": 0.6012572664593754, + "grad_norm": 0.3891024589538574, + "learning_rate": 1.095594941282619e-05, + "loss": 0.053897857666015625, + "step": 8895 + }, + { + "epoch": 0.6013248614303096, + "grad_norm": 0.5967978239059448, + "learning_rate": 1.0952772157329753e-05, + "loss": 0.09563446044921875, + "step": 8896 + }, + { + "epoch": 0.6013924564012437, + "grad_norm": 0.5891037583351135, + "learning_rate": 1.094959509766337e-05, + "loss": 0.10785675048828125, + "step": 8897 + }, + { + "epoch": 0.601460051372178, + "grad_norm": 0.3704696297645569, + "learning_rate": 1.0946418233980765e-05, + "loss": 0.06946563720703125, + "step": 8898 + }, + { + "epoch": 0.6015276463431121, + "grad_norm": 0.7809589505195618, + "learning_rate": 1.094324156643566e-05, + "loss": 0.16839599609375, + "step": 8899 + }, + { + "epoch": 0.6015952413140462, + "grad_norm": 0.2794687747955322, + "learning_rate": 1.094006509518175e-05, + "loss": 0.05208587646484375, + "step": 8900 + }, + { + "epoch": 0.6016628362849804, + "grad_norm": 0.3328988552093506, + "learning_rate": 1.0936888820372744e-05, + "loss": 0.05306243896484375, + "step": 8901 + }, + { + "epoch": 0.6017304312559145, + "grad_norm": 0.6874134540557861, + "learning_rate": 1.0933712742162318e-05, + "loss": 0.13897705078125, + "step": 8902 + }, + { + "epoch": 0.6017980262268487, + "grad_norm": 0.44487616419792175, + "learning_rate": 1.0930536860704167e-05, + "loss": 0.06610107421875, + "step": 8903 + }, + { + "epoch": 0.6018656211977829, + "grad_norm": 0.63250732421875, + "learning_rate": 1.0927361176151946e-05, + "loss": 0.09236907958984375, + "step": 8904 + }, + { + "epoch": 0.6019332161687171, + "grad_norm": 0.902647078037262, + "learning_rate": 1.0924185688659318e-05, + "loss": 0.1717681884765625, + "step": 8905 + }, + { + "epoch": 0.6020008111396512, + "grad_norm": 0.7829903960227966, + "learning_rate": 1.0921010398379937e-05, + "loss": 0.10725021362304688, + "step": 8906 + }, + { + "epoch": 0.6020684061105853, + "grad_norm": 0.36921820044517517, + "learning_rate": 1.0917835305467435e-05, + "loss": 0.06177520751953125, + "step": 8907 + }, + { + "epoch": 0.6021360010815195, + "grad_norm": 0.6368567943572998, + "learning_rate": 1.0914660410075451e-05, + "loss": 0.08468246459960938, + "step": 8908 + }, + { + "epoch": 0.6022035960524537, + "grad_norm": 0.756866991519928, + "learning_rate": 1.09114857123576e-05, + "loss": 0.182037353515625, + "step": 8909 + }, + { + "epoch": 0.6022711910233879, + "grad_norm": 0.8876903653144836, + "learning_rate": 1.0908311212467502e-05, + "loss": 0.1312885284423828, + "step": 8910 + }, + { + "epoch": 0.602338785994322, + "grad_norm": 0.8464537262916565, + "learning_rate": 1.0905136910558746e-05, + "loss": 0.1535797119140625, + "step": 8911 + }, + { + "epoch": 0.6024063809652562, + "grad_norm": 0.6278752684593201, + "learning_rate": 1.0901962806784941e-05, + "loss": 0.09099578857421875, + "step": 8912 + }, + { + "epoch": 0.6024739759361903, + "grad_norm": 0.8549899458885193, + "learning_rate": 1.0898788901299651e-05, + "loss": 0.117431640625, + "step": 8913 + }, + { + "epoch": 0.6025415709071245, + "grad_norm": 0.7282235026359558, + "learning_rate": 1.0895615194256467e-05, + "loss": 0.151092529296875, + "step": 8914 + }, + { + "epoch": 0.6026091658780587, + "grad_norm": 0.21993590891361237, + "learning_rate": 1.0892441685808936e-05, + "loss": 0.029844284057617188, + "step": 8915 + }, + { + "epoch": 0.6026767608489928, + "grad_norm": 1.380179762840271, + "learning_rate": 1.0889268376110632e-05, + "loss": 0.182769775390625, + "step": 8916 + }, + { + "epoch": 0.602744355819927, + "grad_norm": 0.8940958976745605, + "learning_rate": 1.0886095265315083e-05, + "loss": 0.1314849853515625, + "step": 8917 + }, + { + "epoch": 0.6028119507908611, + "grad_norm": 0.5103932023048401, + "learning_rate": 1.0882922353575825e-05, + "loss": 0.0879364013671875, + "step": 8918 + }, + { + "epoch": 0.6028795457617954, + "grad_norm": 0.9034315943717957, + "learning_rate": 1.0879749641046396e-05, + "loss": 0.12932205200195312, + "step": 8919 + }, + { + "epoch": 0.6029471407327295, + "grad_norm": 0.7459540963172913, + "learning_rate": 1.0876577127880293e-05, + "loss": 0.10089874267578125, + "step": 8920 + }, + { + "epoch": 0.6030147357036636, + "grad_norm": 1.0561147928237915, + "learning_rate": 1.0873404814231038e-05, + "loss": 0.1457672119140625, + "step": 8921 + }, + { + "epoch": 0.6030823306745978, + "grad_norm": 0.7520825266838074, + "learning_rate": 1.087023270025212e-05, + "loss": 0.1331329345703125, + "step": 8922 + }, + { + "epoch": 0.6031499256455319, + "grad_norm": 0.7161359190940857, + "learning_rate": 1.0867060786097028e-05, + "loss": 0.1477508544921875, + "step": 8923 + }, + { + "epoch": 0.6032175206164662, + "grad_norm": 0.32280975580215454, + "learning_rate": 1.0863889071919236e-05, + "loss": 0.066619873046875, + "step": 8924 + }, + { + "epoch": 0.6032851155874003, + "grad_norm": 1.2792633771896362, + "learning_rate": 1.0860717557872214e-05, + "loss": 0.16046142578125, + "step": 8925 + }, + { + "epoch": 0.6033527105583345, + "grad_norm": 0.32914575934410095, + "learning_rate": 1.0857546244109415e-05, + "loss": 0.0569610595703125, + "step": 8926 + }, + { + "epoch": 0.6034203055292686, + "grad_norm": 0.9620280861854553, + "learning_rate": 1.0854375130784297e-05, + "loss": 0.140838623046875, + "step": 8927 + }, + { + "epoch": 0.6034879005002027, + "grad_norm": 0.7699841260910034, + "learning_rate": 1.0851204218050292e-05, + "loss": 0.10177230834960938, + "step": 8928 + }, + { + "epoch": 0.603555495471137, + "grad_norm": 0.6722339391708374, + "learning_rate": 1.084803350606082e-05, + "loss": 0.122314453125, + "step": 8929 + }, + { + "epoch": 0.6036230904420711, + "grad_norm": 0.49711957573890686, + "learning_rate": 1.0844862994969311e-05, + "loss": 0.06927490234375, + "step": 8930 + }, + { + "epoch": 0.6036906854130053, + "grad_norm": 0.40221449732780457, + "learning_rate": 1.0841692684929169e-05, + "loss": 0.06896209716796875, + "step": 8931 + }, + { + "epoch": 0.6037582803839394, + "grad_norm": 0.5576736927032471, + "learning_rate": 1.08385225760938e-05, + "loss": 0.1083831787109375, + "step": 8932 + }, + { + "epoch": 0.6038258753548736, + "grad_norm": 0.6163668036460876, + "learning_rate": 1.0835352668616584e-05, + "loss": 0.0991973876953125, + "step": 8933 + }, + { + "epoch": 0.6038934703258078, + "grad_norm": 1.0698609352111816, + "learning_rate": 1.083218296265091e-05, + "loss": 0.122039794921875, + "step": 8934 + }, + { + "epoch": 0.6039610652967419, + "grad_norm": 0.936654806137085, + "learning_rate": 1.0829013458350137e-05, + "loss": 0.205047607421875, + "step": 8935 + }, + { + "epoch": 0.6040286602676761, + "grad_norm": 0.288264662027359, + "learning_rate": 1.0825844155867641e-05, + "loss": 0.050289154052734375, + "step": 8936 + }, + { + "epoch": 0.6040962552386102, + "grad_norm": 0.705342173576355, + "learning_rate": 1.0822675055356756e-05, + "loss": 0.098419189453125, + "step": 8937 + }, + { + "epoch": 0.6041638502095444, + "grad_norm": 0.687763512134552, + "learning_rate": 1.0819506156970835e-05, + "loss": 0.08805084228515625, + "step": 8938 + }, + { + "epoch": 0.6042314451804786, + "grad_norm": 0.6972993016242981, + "learning_rate": 1.0816337460863198e-05, + "loss": 0.1649322509765625, + "step": 8939 + }, + { + "epoch": 0.6042990401514128, + "grad_norm": 0.7835142016410828, + "learning_rate": 1.081316896718718e-05, + "loss": 0.09368324279785156, + "step": 8940 + }, + { + "epoch": 0.6043666351223469, + "grad_norm": 0.26201921701431274, + "learning_rate": 1.0810000676096088e-05, + "loss": 0.039615631103515625, + "step": 8941 + }, + { + "epoch": 0.604434230093281, + "grad_norm": 0.9590473175048828, + "learning_rate": 1.0806832587743211e-05, + "loss": 0.202911376953125, + "step": 8942 + }, + { + "epoch": 0.6045018250642152, + "grad_norm": 0.4580956995487213, + "learning_rate": 1.0803664702281855e-05, + "loss": 0.08663558959960938, + "step": 8943 + }, + { + "epoch": 0.6045694200351494, + "grad_norm": 1.0245367288589478, + "learning_rate": 1.0800497019865294e-05, + "loss": 0.1488800048828125, + "step": 8944 + }, + { + "epoch": 0.6046370150060836, + "grad_norm": 0.216421440243721, + "learning_rate": 1.079732954064681e-05, + "loss": 0.04180335998535156, + "step": 8945 + }, + { + "epoch": 0.6047046099770177, + "grad_norm": 0.5818500518798828, + "learning_rate": 1.0794162264779652e-05, + "loss": 0.10808181762695312, + "step": 8946 + }, + { + "epoch": 0.6047722049479519, + "grad_norm": 0.3360268175601959, + "learning_rate": 1.0790995192417082e-05, + "loss": 0.07146453857421875, + "step": 8947 + }, + { + "epoch": 0.604839799918886, + "grad_norm": 0.34804436564445496, + "learning_rate": 1.0787828323712338e-05, + "loss": 0.08276748657226562, + "step": 8948 + }, + { + "epoch": 0.6049073948898201, + "grad_norm": 0.30645108222961426, + "learning_rate": 1.0784661658818662e-05, + "loss": 0.03465461730957031, + "step": 8949 + }, + { + "epoch": 0.6049749898607544, + "grad_norm": 0.7750763297080994, + "learning_rate": 1.0781495197889263e-05, + "loss": 0.17822265625, + "step": 8950 + }, + { + "epoch": 0.6050425848316885, + "grad_norm": 0.7163494825363159, + "learning_rate": 1.0778328941077363e-05, + "loss": 0.10387420654296875, + "step": 8951 + }, + { + "epoch": 0.6051101798026227, + "grad_norm": 0.41447141766548157, + "learning_rate": 1.0775162888536168e-05, + "loss": 0.0685882568359375, + "step": 8952 + }, + { + "epoch": 0.6051777747735568, + "grad_norm": 0.41146400570869446, + "learning_rate": 1.077199704041886e-05, + "loss": 0.06015777587890625, + "step": 8953 + }, + { + "epoch": 0.6052453697444911, + "grad_norm": 1.238643765449524, + "learning_rate": 1.0768831396878632e-05, + "loss": 0.1760711669921875, + "step": 8954 + }, + { + "epoch": 0.6053129647154252, + "grad_norm": 0.41904714703559875, + "learning_rate": 1.076566595806865e-05, + "loss": 0.0699615478515625, + "step": 8955 + }, + { + "epoch": 0.6053805596863593, + "grad_norm": 0.5388588905334473, + "learning_rate": 1.076250072414209e-05, + "loss": 0.131011962890625, + "step": 8956 + }, + { + "epoch": 0.6054481546572935, + "grad_norm": 0.6900516748428345, + "learning_rate": 1.0759335695252088e-05, + "loss": 0.13208961486816406, + "step": 8957 + }, + { + "epoch": 0.6055157496282276, + "grad_norm": 0.5291900634765625, + "learning_rate": 1.0756170871551807e-05, + "loss": 0.1037750244140625, + "step": 8958 + }, + { + "epoch": 0.6055833445991619, + "grad_norm": 0.49989044666290283, + "learning_rate": 1.0753006253194363e-05, + "loss": 0.1087799072265625, + "step": 8959 + }, + { + "epoch": 0.605650939570096, + "grad_norm": 1.0216914415359497, + "learning_rate": 1.0749841840332892e-05, + "loss": 0.11053466796875, + "step": 8960 + }, + { + "epoch": 0.6057185345410302, + "grad_norm": 0.2690969705581665, + "learning_rate": 1.0746677633120501e-05, + "loss": 0.043338775634765625, + "step": 8961 + }, + { + "epoch": 0.6057861295119643, + "grad_norm": 0.7839911580085754, + "learning_rate": 1.0743513631710306e-05, + "loss": 0.09820556640625, + "step": 8962 + }, + { + "epoch": 0.6058537244828984, + "grad_norm": 3.9018208980560303, + "learning_rate": 1.0740349836255388e-05, + "loss": 0.19249725341796875, + "step": 8963 + }, + { + "epoch": 0.6059213194538327, + "grad_norm": 1.1016181707382202, + "learning_rate": 1.0737186246908831e-05, + "loss": 0.184326171875, + "step": 8964 + }, + { + "epoch": 0.6059889144247668, + "grad_norm": 0.7160438299179077, + "learning_rate": 1.073402286382372e-05, + "loss": 0.155303955078125, + "step": 8965 + }, + { + "epoch": 0.606056509395701, + "grad_norm": 0.19975286722183228, + "learning_rate": 1.0730859687153105e-05, + "loss": 0.031156539916992188, + "step": 8966 + }, + { + "epoch": 0.6061241043666351, + "grad_norm": 0.8802576065063477, + "learning_rate": 1.0727696717050054e-05, + "loss": 0.14832305908203125, + "step": 8967 + }, + { + "epoch": 0.6061916993375693, + "grad_norm": 0.29213032126426697, + "learning_rate": 1.07245339536676e-05, + "loss": 0.051578521728515625, + "step": 8968 + }, + { + "epoch": 0.6062592943085034, + "grad_norm": 0.174419566988945, + "learning_rate": 1.072137139715879e-05, + "loss": 0.03055572509765625, + "step": 8969 + }, + { + "epoch": 0.6063268892794376, + "grad_norm": 1.5335427522659302, + "learning_rate": 1.0718209047676632e-05, + "loss": 0.249053955078125, + "step": 8970 + }, + { + "epoch": 0.6063944842503718, + "grad_norm": 0.3410348892211914, + "learning_rate": 1.0715046905374152e-05, + "loss": 0.07297515869140625, + "step": 8971 + }, + { + "epoch": 0.6064620792213059, + "grad_norm": 0.30189159512519836, + "learning_rate": 1.0711884970404347e-05, + "loss": 0.06975555419921875, + "step": 8972 + }, + { + "epoch": 0.6065296741922401, + "grad_norm": 0.2083105742931366, + "learning_rate": 1.0708723242920222e-05, + "loss": 0.030191421508789062, + "step": 8973 + }, + { + "epoch": 0.6065972691631742, + "grad_norm": 1.0462498664855957, + "learning_rate": 1.0705561723074749e-05, + "loss": 0.204925537109375, + "step": 8974 + }, + { + "epoch": 0.6066648641341085, + "grad_norm": 0.8850278258323669, + "learning_rate": 1.070240041102091e-05, + "loss": 0.136627197265625, + "step": 8975 + }, + { + "epoch": 0.6067324591050426, + "grad_norm": 1.0519336462020874, + "learning_rate": 1.0699239306911664e-05, + "loss": 0.16162109375, + "step": 8976 + }, + { + "epoch": 0.6068000540759767, + "grad_norm": 0.9749851226806641, + "learning_rate": 1.0696078410899964e-05, + "loss": 0.13297271728515625, + "step": 8977 + }, + { + "epoch": 0.6068676490469109, + "grad_norm": 0.7272592186927795, + "learning_rate": 1.0692917723138762e-05, + "loss": 0.146636962890625, + "step": 8978 + }, + { + "epoch": 0.606935244017845, + "grad_norm": 0.37036705017089844, + "learning_rate": 1.068975724378098e-05, + "loss": 0.064544677734375, + "step": 8979 + }, + { + "epoch": 0.6070028389887793, + "grad_norm": 0.5481855869293213, + "learning_rate": 1.068659697297955e-05, + "loss": 0.098175048828125, + "step": 8980 + }, + { + "epoch": 0.6070704339597134, + "grad_norm": 0.8686921000480652, + "learning_rate": 1.0683436910887378e-05, + "loss": 0.186370849609375, + "step": 8981 + }, + { + "epoch": 0.6071380289306476, + "grad_norm": 1.2473478317260742, + "learning_rate": 1.068027705765738e-05, + "loss": 0.196807861328125, + "step": 8982 + }, + { + "epoch": 0.6072056239015817, + "grad_norm": 0.5708610415458679, + "learning_rate": 1.067711741344244e-05, + "loss": 0.1205291748046875, + "step": 8983 + }, + { + "epoch": 0.6072732188725158, + "grad_norm": 0.6636106967926025, + "learning_rate": 1.067395797839544e-05, + "loss": 0.115875244140625, + "step": 8984 + }, + { + "epoch": 0.6073408138434501, + "grad_norm": 0.5836614966392517, + "learning_rate": 1.0670798752669258e-05, + "loss": 0.10653305053710938, + "step": 8985 + }, + { + "epoch": 0.6074084088143842, + "grad_norm": 2.1996970176696777, + "learning_rate": 1.0667639736416759e-05, + "loss": 0.264190673828125, + "step": 8986 + }, + { + "epoch": 0.6074760037853184, + "grad_norm": 0.49595192074775696, + "learning_rate": 1.0664480929790793e-05, + "loss": 0.10271453857421875, + "step": 8987 + }, + { + "epoch": 0.6075435987562525, + "grad_norm": 0.4487035274505615, + "learning_rate": 1.0661322332944197e-05, + "loss": 0.08367156982421875, + "step": 8988 + }, + { + "epoch": 0.6076111937271867, + "grad_norm": 0.3795643746852875, + "learning_rate": 1.065816394602981e-05, + "loss": 0.0727996826171875, + "step": 8989 + }, + { + "epoch": 0.6076787886981209, + "grad_norm": 0.5423368215560913, + "learning_rate": 1.065500576920045e-05, + "loss": 0.11856842041015625, + "step": 8990 + }, + { + "epoch": 0.607746383669055, + "grad_norm": 0.1772572249174118, + "learning_rate": 1.065184780260894e-05, + "loss": 0.03580284118652344, + "step": 8991 + }, + { + "epoch": 0.6078139786399892, + "grad_norm": 1.5853924751281738, + "learning_rate": 1.0648690046408067e-05, + "loss": 0.2718505859375, + "step": 8992 + }, + { + "epoch": 0.6078815736109233, + "grad_norm": 0.8970293998718262, + "learning_rate": 1.0645532500750631e-05, + "loss": 0.15085601806640625, + "step": 8993 + }, + { + "epoch": 0.6079491685818575, + "grad_norm": 0.8370931148529053, + "learning_rate": 1.0642375165789411e-05, + "loss": 0.09247589111328125, + "step": 8994 + }, + { + "epoch": 0.6080167635527917, + "grad_norm": 0.3502548336982727, + "learning_rate": 1.0639218041677188e-05, + "loss": 0.0904083251953125, + "step": 8995 + }, + { + "epoch": 0.6080843585237259, + "grad_norm": 0.6048226356506348, + "learning_rate": 1.063606112856671e-05, + "loss": 0.099334716796875, + "step": 8996 + }, + { + "epoch": 0.60815195349466, + "grad_norm": 0.6327472925186157, + "learning_rate": 1.0632904426610735e-05, + "loss": 0.1144256591796875, + "step": 8997 + }, + { + "epoch": 0.6082195484655941, + "grad_norm": 0.340125173330307, + "learning_rate": 1.0629747935961997e-05, + "loss": 0.07202529907226562, + "step": 8998 + }, + { + "epoch": 0.6082871434365283, + "grad_norm": 0.4970391094684601, + "learning_rate": 1.0626591656773241e-05, + "loss": 0.08341598510742188, + "step": 8999 + }, + { + "epoch": 0.6083547384074625, + "grad_norm": 0.7008248567581177, + "learning_rate": 1.0623435589197177e-05, + "loss": 0.11106681823730469, + "step": 9000 + }, + { + "epoch": 0.6084223333783967, + "grad_norm": 0.3534438908100128, + "learning_rate": 1.0620279733386511e-05, + "loss": 0.06522369384765625, + "step": 9001 + }, + { + "epoch": 0.6084899283493308, + "grad_norm": 0.6022732853889465, + "learning_rate": 1.0617124089493957e-05, + "loss": 0.12945556640625, + "step": 9002 + }, + { + "epoch": 0.608557523320265, + "grad_norm": 0.5909791588783264, + "learning_rate": 1.0613968657672187e-05, + "loss": 0.11328125, + "step": 9003 + }, + { + "epoch": 0.6086251182911991, + "grad_norm": 0.9694198369979858, + "learning_rate": 1.06108134380739e-05, + "loss": 0.12322235107421875, + "step": 9004 + }, + { + "epoch": 0.6086927132621333, + "grad_norm": 0.5977190136909485, + "learning_rate": 1.0607658430851745e-05, + "loss": 0.1402130126953125, + "step": 9005 + }, + { + "epoch": 0.6087603082330675, + "grad_norm": 0.2141445279121399, + "learning_rate": 1.0604503636158396e-05, + "loss": 0.03854179382324219, + "step": 9006 + }, + { + "epoch": 0.6088279032040016, + "grad_norm": 0.7571966052055359, + "learning_rate": 1.0601349054146492e-05, + "loss": 0.172454833984375, + "step": 9007 + }, + { + "epoch": 0.6088954981749358, + "grad_norm": 1.1226798295974731, + "learning_rate": 1.0598194684968683e-05, + "loss": 0.14537429809570312, + "step": 9008 + }, + { + "epoch": 0.6089630931458699, + "grad_norm": 0.7719661593437195, + "learning_rate": 1.0595040528777584e-05, + "loss": 0.1320362091064453, + "step": 9009 + }, + { + "epoch": 0.6090306881168042, + "grad_norm": 0.2555690109729767, + "learning_rate": 1.059188658572582e-05, + "loss": 0.0418243408203125, + "step": 9010 + }, + { + "epoch": 0.6090982830877383, + "grad_norm": 0.9165169596672058, + "learning_rate": 1.0588732855966002e-05, + "loss": 0.13387298583984375, + "step": 9011 + }, + { + "epoch": 0.6091658780586724, + "grad_norm": 0.9678157567977905, + "learning_rate": 1.0585579339650713e-05, + "loss": 0.16027069091796875, + "step": 9012 + }, + { + "epoch": 0.6092334730296066, + "grad_norm": 0.3137754201889038, + "learning_rate": 1.0582426036932552e-05, + "loss": 0.050750732421875, + "step": 9013 + }, + { + "epoch": 0.6093010680005407, + "grad_norm": 0.4588235020637512, + "learning_rate": 1.0579272947964091e-05, + "loss": 0.09232711791992188, + "step": 9014 + }, + { + "epoch": 0.609368662971475, + "grad_norm": 0.3933546841144562, + "learning_rate": 1.0576120072897902e-05, + "loss": 0.07894134521484375, + "step": 9015 + }, + { + "epoch": 0.6094362579424091, + "grad_norm": 0.8222509026527405, + "learning_rate": 1.0572967411886528e-05, + "loss": 0.131439208984375, + "step": 9016 + }, + { + "epoch": 0.6095038529133433, + "grad_norm": 0.42073020339012146, + "learning_rate": 1.0569814965082526e-05, + "loss": 0.0543365478515625, + "step": 9017 + }, + { + "epoch": 0.6095714478842774, + "grad_norm": 0.7434269785881042, + "learning_rate": 1.0566662732638425e-05, + "loss": 0.165313720703125, + "step": 9018 + }, + { + "epoch": 0.6096390428552115, + "grad_norm": 0.6971110701560974, + "learning_rate": 1.0563510714706757e-05, + "loss": 0.12282180786132812, + "step": 9019 + }, + { + "epoch": 0.6097066378261458, + "grad_norm": 0.7593880891799927, + "learning_rate": 1.0560358911440025e-05, + "loss": 0.12361907958984375, + "step": 9020 + }, + { + "epoch": 0.6097742327970799, + "grad_norm": 0.9397629499435425, + "learning_rate": 1.0557207322990744e-05, + "loss": 0.17401123046875, + "step": 9021 + }, + { + "epoch": 0.6098418277680141, + "grad_norm": 0.6663335561752319, + "learning_rate": 1.0554055949511401e-05, + "loss": 0.10681915283203125, + "step": 9022 + }, + { + "epoch": 0.6099094227389482, + "grad_norm": 1.039415955543518, + "learning_rate": 1.0550904791154477e-05, + "loss": 0.13478851318359375, + "step": 9023 + }, + { + "epoch": 0.6099770177098823, + "grad_norm": 0.7123866677284241, + "learning_rate": 1.0547753848072452e-05, + "loss": 0.10508346557617188, + "step": 9024 + }, + { + "epoch": 0.6100446126808166, + "grad_norm": 0.6131675243377686, + "learning_rate": 1.0544603120417777e-05, + "loss": 0.108917236328125, + "step": 9025 + }, + { + "epoch": 0.6101122076517507, + "grad_norm": 0.7063643336296082, + "learning_rate": 1.0541452608342916e-05, + "loss": 0.152008056640625, + "step": 9026 + }, + { + "epoch": 0.6101798026226849, + "grad_norm": 0.9971559643745422, + "learning_rate": 1.0538302312000303e-05, + "loss": 0.18060302734375, + "step": 9027 + }, + { + "epoch": 0.610247397593619, + "grad_norm": 1.0286626815795898, + "learning_rate": 1.0535152231542374e-05, + "loss": 0.1570587158203125, + "step": 9028 + }, + { + "epoch": 0.6103149925645532, + "grad_norm": 1.0714913606643677, + "learning_rate": 1.0532002367121544e-05, + "loss": 0.1533355712890625, + "step": 9029 + }, + { + "epoch": 0.6103825875354874, + "grad_norm": 1.3208982944488525, + "learning_rate": 1.0528852718890229e-05, + "loss": 0.22314453125, + "step": 9030 + }, + { + "epoch": 0.6104501825064215, + "grad_norm": 0.6240246295928955, + "learning_rate": 1.052570328700082e-05, + "loss": 0.0794525146484375, + "step": 9031 + }, + { + "epoch": 0.6105177774773557, + "grad_norm": 0.8465886116027832, + "learning_rate": 1.0522554071605716e-05, + "loss": 0.181121826171875, + "step": 9032 + }, + { + "epoch": 0.6105853724482898, + "grad_norm": 0.2867729067802429, + "learning_rate": 1.0519405072857288e-05, + "loss": 0.04620170593261719, + "step": 9033 + }, + { + "epoch": 0.610652967419224, + "grad_norm": 0.6397718191146851, + "learning_rate": 1.051625629090791e-05, + "loss": 0.12128448486328125, + "step": 9034 + }, + { + "epoch": 0.6107205623901582, + "grad_norm": 0.7326084971427917, + "learning_rate": 1.0513107725909938e-05, + "loss": 0.09585189819335938, + "step": 9035 + }, + { + "epoch": 0.6107881573610924, + "grad_norm": 1.4313981533050537, + "learning_rate": 1.0509959378015712e-05, + "loss": 0.137786865234375, + "step": 9036 + }, + { + "epoch": 0.6108557523320265, + "grad_norm": 0.2235555648803711, + "learning_rate": 1.050681124737758e-05, + "loss": 0.0370330810546875, + "step": 9037 + }, + { + "epoch": 0.6109233473029606, + "grad_norm": 0.6850353479385376, + "learning_rate": 1.050366333414786e-05, + "loss": 0.1328277587890625, + "step": 9038 + }, + { + "epoch": 0.6109909422738948, + "grad_norm": 0.9295252561569214, + "learning_rate": 1.050051563847887e-05, + "loss": 0.151702880859375, + "step": 9039 + }, + { + "epoch": 0.611058537244829, + "grad_norm": 1.0423345565795898, + "learning_rate": 1.0497368160522913e-05, + "loss": 0.157562255859375, + "step": 9040 + }, + { + "epoch": 0.6111261322157632, + "grad_norm": 0.7436097264289856, + "learning_rate": 1.0494220900432293e-05, + "loss": 0.10150146484375, + "step": 9041 + }, + { + "epoch": 0.6111937271866973, + "grad_norm": 0.865192174911499, + "learning_rate": 1.0491073858359278e-05, + "loss": 0.113189697265625, + "step": 9042 + }, + { + "epoch": 0.6112613221576315, + "grad_norm": 0.6827943325042725, + "learning_rate": 1.0487927034456157e-05, + "loss": 0.0850372314453125, + "step": 9043 + }, + { + "epoch": 0.6113289171285656, + "grad_norm": 0.9512733817100525, + "learning_rate": 1.0484780428875182e-05, + "loss": 0.114288330078125, + "step": 9044 + }, + { + "epoch": 0.6113965120994997, + "grad_norm": 0.32173576951026917, + "learning_rate": 1.0481634041768617e-05, + "loss": 0.0628814697265625, + "step": 9045 + }, + { + "epoch": 0.611464107070434, + "grad_norm": 0.9790663719177246, + "learning_rate": 1.0478487873288694e-05, + "loss": 0.12026596069335938, + "step": 9046 + }, + { + "epoch": 0.6115317020413681, + "grad_norm": 0.8457990884780884, + "learning_rate": 1.047534192358764e-05, + "loss": 0.13368988037109375, + "step": 9047 + }, + { + "epoch": 0.6115992970123023, + "grad_norm": 0.5186946988105774, + "learning_rate": 1.0472196192817691e-05, + "loss": 0.12260818481445312, + "step": 9048 + }, + { + "epoch": 0.6116668919832364, + "grad_norm": 0.5069012641906738, + "learning_rate": 1.0469050681131042e-05, + "loss": 0.09975433349609375, + "step": 9049 + }, + { + "epoch": 0.6117344869541707, + "grad_norm": 1.1326515674591064, + "learning_rate": 1.0465905388679905e-05, + "loss": 0.1641845703125, + "step": 9050 + }, + { + "epoch": 0.6118020819251048, + "grad_norm": 0.5034628510475159, + "learning_rate": 1.0462760315616455e-05, + "loss": 0.09161376953125, + "step": 9051 + }, + { + "epoch": 0.6118696768960389, + "grad_norm": 0.8376498222351074, + "learning_rate": 1.0459615462092882e-05, + "loss": 0.221923828125, + "step": 9052 + }, + { + "epoch": 0.6119372718669731, + "grad_norm": 0.6001039743423462, + "learning_rate": 1.0456470828261347e-05, + "loss": 0.1143798828125, + "step": 9053 + }, + { + "epoch": 0.6120048668379072, + "grad_norm": 0.7008835077285767, + "learning_rate": 1.045332641427401e-05, + "loss": 0.1198577880859375, + "step": 9054 + }, + { + "epoch": 0.6120724618088415, + "grad_norm": 0.752806544303894, + "learning_rate": 1.0450182220283018e-05, + "loss": 0.12874984741210938, + "step": 9055 + }, + { + "epoch": 0.6121400567797756, + "grad_norm": 0.8921047449111938, + "learning_rate": 1.0447038246440503e-05, + "loss": 0.171356201171875, + "step": 9056 + }, + { + "epoch": 0.6122076517507098, + "grad_norm": 0.6351968050003052, + "learning_rate": 1.0443894492898598e-05, + "loss": 0.095550537109375, + "step": 9057 + }, + { + "epoch": 0.6122752467216439, + "grad_norm": 0.5007563829421997, + "learning_rate": 1.0440750959809404e-05, + "loss": 0.05968475341796875, + "step": 9058 + }, + { + "epoch": 0.612342841692578, + "grad_norm": 0.625873327255249, + "learning_rate": 1.0437607647325037e-05, + "loss": 0.137451171875, + "step": 9059 + }, + { + "epoch": 0.6124104366635122, + "grad_norm": 0.7329569458961487, + "learning_rate": 1.043446455559758e-05, + "loss": 0.10188865661621094, + "step": 9060 + }, + { + "epoch": 0.6124780316344464, + "grad_norm": 0.58722323179245, + "learning_rate": 1.0431321684779128e-05, + "loss": 0.107391357421875, + "step": 9061 + }, + { + "epoch": 0.6125456266053806, + "grad_norm": 0.47821542620658875, + "learning_rate": 1.0428179035021738e-05, + "loss": 0.08433914184570312, + "step": 9062 + }, + { + "epoch": 0.6126132215763147, + "grad_norm": 0.6931520104408264, + "learning_rate": 1.0425036606477484e-05, + "loss": 0.139556884765625, + "step": 9063 + }, + { + "epoch": 0.6126808165472489, + "grad_norm": 0.27519893646240234, + "learning_rate": 1.0421894399298405e-05, + "loss": 0.04198455810546875, + "step": 9064 + }, + { + "epoch": 0.612748411518183, + "grad_norm": 0.43012118339538574, + "learning_rate": 1.041875241363655e-05, + "loss": 0.07353591918945312, + "step": 9065 + }, + { + "epoch": 0.6128160064891172, + "grad_norm": 0.3091872036457062, + "learning_rate": 1.0415610649643943e-05, + "loss": 0.05059814453125, + "step": 9066 + }, + { + "epoch": 0.6128836014600514, + "grad_norm": 1.1052031517028809, + "learning_rate": 1.0412469107472605e-05, + "loss": 0.17999267578125, + "step": 9067 + }, + { + "epoch": 0.6129511964309855, + "grad_norm": 0.3258644938468933, + "learning_rate": 1.0409327787274537e-05, + "loss": 0.04219818115234375, + "step": 9068 + }, + { + "epoch": 0.6130187914019197, + "grad_norm": 0.5124245285987854, + "learning_rate": 1.0406186689201741e-05, + "loss": 0.0855560302734375, + "step": 9069 + }, + { + "epoch": 0.6130863863728538, + "grad_norm": 0.8681009411811829, + "learning_rate": 1.0403045813406205e-05, + "loss": 0.1205291748046875, + "step": 9070 + }, + { + "epoch": 0.6131539813437881, + "grad_norm": 0.6023353934288025, + "learning_rate": 1.0399905160039895e-05, + "loss": 0.11276435852050781, + "step": 9071 + }, + { + "epoch": 0.6132215763147222, + "grad_norm": 0.5827052593231201, + "learning_rate": 1.0396764729254783e-05, + "loss": 0.123138427734375, + "step": 9072 + }, + { + "epoch": 0.6132891712856563, + "grad_norm": 0.39493197202682495, + "learning_rate": 1.0393624521202819e-05, + "loss": 0.0679168701171875, + "step": 9073 + }, + { + "epoch": 0.6133567662565905, + "grad_norm": 1.123207926750183, + "learning_rate": 1.0390484536035952e-05, + "loss": 0.12995147705078125, + "step": 9074 + }, + { + "epoch": 0.6134243612275246, + "grad_norm": 1.100899577140808, + "learning_rate": 1.0387344773906104e-05, + "loss": 0.1276397705078125, + "step": 9075 + }, + { + "epoch": 0.6134919561984589, + "grad_norm": 0.6759874224662781, + "learning_rate": 1.0384205234965205e-05, + "loss": 0.115264892578125, + "step": 9076 + }, + { + "epoch": 0.613559551169393, + "grad_norm": 0.39691025018692017, + "learning_rate": 1.0381065919365159e-05, + "loss": 0.054718017578125, + "step": 9077 + }, + { + "epoch": 0.6136271461403272, + "grad_norm": 0.21654967963695526, + "learning_rate": 1.0377926827257876e-05, + "loss": 0.04367828369140625, + "step": 9078 + }, + { + "epoch": 0.6136947411112613, + "grad_norm": 0.37520045042037964, + "learning_rate": 1.037478795879523e-05, + "loss": 0.07342529296875, + "step": 9079 + }, + { + "epoch": 0.6137623360821954, + "grad_norm": 0.4151078164577484, + "learning_rate": 1.0371649314129112e-05, + "loss": 0.0718536376953125, + "step": 9080 + }, + { + "epoch": 0.6138299310531297, + "grad_norm": 0.3532516658306122, + "learning_rate": 1.0368510893411382e-05, + "loss": 0.05017852783203125, + "step": 9081 + }, + { + "epoch": 0.6138975260240638, + "grad_norm": 1.1008853912353516, + "learning_rate": 1.0365372696793894e-05, + "loss": 0.15049362182617188, + "step": 9082 + }, + { + "epoch": 0.613965120994998, + "grad_norm": 0.4996678829193115, + "learning_rate": 1.0362234724428504e-05, + "loss": 0.084808349609375, + "step": 9083 + }, + { + "epoch": 0.6140327159659321, + "grad_norm": 1.234537959098816, + "learning_rate": 1.0359096976467033e-05, + "loss": 0.1880340576171875, + "step": 9084 + }, + { + "epoch": 0.6141003109368663, + "grad_norm": 0.6137944459915161, + "learning_rate": 1.0355959453061315e-05, + "loss": 0.1143798828125, + "step": 9085 + }, + { + "epoch": 0.6141679059078005, + "grad_norm": 0.7358008027076721, + "learning_rate": 1.0352822154363158e-05, + "loss": 0.14883041381835938, + "step": 9086 + }, + { + "epoch": 0.6142355008787346, + "grad_norm": 1.3058034181594849, + "learning_rate": 1.0349685080524369e-05, + "loss": 0.203369140625, + "step": 9087 + }, + { + "epoch": 0.6143030958496688, + "grad_norm": 0.23538514971733093, + "learning_rate": 1.0346548231696733e-05, + "loss": 0.04836273193359375, + "step": 9088 + }, + { + "epoch": 0.6143706908206029, + "grad_norm": 0.7682739496231079, + "learning_rate": 1.0343411608032034e-05, + "loss": 0.1397247314453125, + "step": 9089 + }, + { + "epoch": 0.6144382857915371, + "grad_norm": 0.7948892116546631, + "learning_rate": 1.0340275209682037e-05, + "loss": 0.115875244140625, + "step": 9090 + }, + { + "epoch": 0.6145058807624713, + "grad_norm": 0.6030557751655579, + "learning_rate": 1.0337139036798513e-05, + "loss": 0.138153076171875, + "step": 9091 + }, + { + "epoch": 0.6145734757334055, + "grad_norm": 0.44176608324050903, + "learning_rate": 1.0334003089533193e-05, + "loss": 0.1098175048828125, + "step": 9092 + }, + { + "epoch": 0.6146410707043396, + "grad_norm": 0.6267017722129822, + "learning_rate": 1.033086736803782e-05, + "loss": 0.1199951171875, + "step": 9093 + }, + { + "epoch": 0.6147086656752737, + "grad_norm": 0.2662566900253296, + "learning_rate": 1.0327731872464128e-05, + "loss": 0.038639068603515625, + "step": 9094 + }, + { + "epoch": 0.6147762606462079, + "grad_norm": 0.7101941704750061, + "learning_rate": 1.0324596602963817e-05, + "loss": 0.0880889892578125, + "step": 9095 + }, + { + "epoch": 0.614843855617142, + "grad_norm": 0.5756741166114807, + "learning_rate": 1.0321461559688605e-05, + "loss": 0.1163177490234375, + "step": 9096 + }, + { + "epoch": 0.6149114505880763, + "grad_norm": 0.7321146726608276, + "learning_rate": 1.0318326742790171e-05, + "loss": 0.1395721435546875, + "step": 9097 + }, + { + "epoch": 0.6149790455590104, + "grad_norm": 0.779909610748291, + "learning_rate": 1.0315192152420208e-05, + "loss": 0.1458892822265625, + "step": 9098 + }, + { + "epoch": 0.6150466405299446, + "grad_norm": 0.20508144795894623, + "learning_rate": 1.0312057788730382e-05, + "loss": 0.022319793701171875, + "step": 9099 + }, + { + "epoch": 0.6151142355008787, + "grad_norm": 0.31412407755851746, + "learning_rate": 1.0308923651872357e-05, + "loss": 0.064910888671875, + "step": 9100 + }, + { + "epoch": 0.6151818304718129, + "grad_norm": 0.555168092250824, + "learning_rate": 1.0305789741997776e-05, + "loss": 0.11309814453125, + "step": 9101 + }, + { + "epoch": 0.6152494254427471, + "grad_norm": 0.7662168145179749, + "learning_rate": 1.0302656059258281e-05, + "loss": 0.12469482421875, + "step": 9102 + }, + { + "epoch": 0.6153170204136812, + "grad_norm": 0.39046353101730347, + "learning_rate": 1.0299522603805495e-05, + "loss": 0.081451416015625, + "step": 9103 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 1.433358073234558, + "learning_rate": 1.0296389375791048e-05, + "loss": 0.239776611328125, + "step": 9104 + }, + { + "epoch": 0.6154522103555495, + "grad_norm": 0.22495847940444946, + "learning_rate": 1.0293256375366526e-05, + "loss": 0.029600143432617188, + "step": 9105 + }, + { + "epoch": 0.6155198053264838, + "grad_norm": 0.5863970518112183, + "learning_rate": 1.0290123602683532e-05, + "loss": 0.09595489501953125, + "step": 9106 + }, + { + "epoch": 0.6155874002974179, + "grad_norm": 0.3621338903903961, + "learning_rate": 1.0286991057893652e-05, + "loss": 0.056713104248046875, + "step": 9107 + }, + { + "epoch": 0.615654995268352, + "grad_norm": 0.1745012104511261, + "learning_rate": 1.028385874114845e-05, + "loss": 0.030193328857421875, + "step": 9108 + }, + { + "epoch": 0.6157225902392862, + "grad_norm": 0.3044671416282654, + "learning_rate": 1.0280726652599496e-05, + "loss": 0.06719970703125, + "step": 9109 + }, + { + "epoch": 0.6157901852102203, + "grad_norm": 0.40509501099586487, + "learning_rate": 1.0277594792398328e-05, + "loss": 0.07532501220703125, + "step": 9110 + }, + { + "epoch": 0.6158577801811546, + "grad_norm": 0.7163841724395752, + "learning_rate": 1.0274463160696504e-05, + "loss": 0.08196258544921875, + "step": 9111 + }, + { + "epoch": 0.6159253751520887, + "grad_norm": 0.3332405388355255, + "learning_rate": 1.027133175764553e-05, + "loss": 0.05681133270263672, + "step": 9112 + }, + { + "epoch": 0.6159929701230229, + "grad_norm": 0.26018890738487244, + "learning_rate": 1.0268200583396939e-05, + "loss": 0.0453643798828125, + "step": 9113 + }, + { + "epoch": 0.616060565093957, + "grad_norm": 0.36170125007629395, + "learning_rate": 1.0265069638102225e-05, + "loss": 0.0397491455078125, + "step": 9114 + }, + { + "epoch": 0.6161281600648911, + "grad_norm": 0.36233070492744446, + "learning_rate": 1.026193892191289e-05, + "loss": 0.06021881103515625, + "step": 9115 + }, + { + "epoch": 0.6161957550358254, + "grad_norm": 0.6880051493644714, + "learning_rate": 1.0258808434980422e-05, + "loss": 0.156890869140625, + "step": 9116 + }, + { + "epoch": 0.6162633500067595, + "grad_norm": 0.5605556964874268, + "learning_rate": 1.025567817745628e-05, + "loss": 0.11092376708984375, + "step": 9117 + }, + { + "epoch": 0.6163309449776937, + "grad_norm": 0.5914947390556335, + "learning_rate": 1.0252548149491934e-05, + "loss": 0.10406494140625, + "step": 9118 + }, + { + "epoch": 0.6163985399486278, + "grad_norm": 0.7153418064117432, + "learning_rate": 1.0249418351238828e-05, + "loss": 0.1226806640625, + "step": 9119 + }, + { + "epoch": 0.616466134919562, + "grad_norm": 0.699324905872345, + "learning_rate": 1.0246288782848415e-05, + "loss": 0.13946533203125, + "step": 9120 + }, + { + "epoch": 0.6165337298904962, + "grad_norm": 0.40271174907684326, + "learning_rate": 1.0243159444472106e-05, + "loss": 0.0843658447265625, + "step": 9121 + }, + { + "epoch": 0.6166013248614303, + "grad_norm": 1.4365196228027344, + "learning_rate": 1.0240030336261332e-05, + "loss": 0.17138671875, + "step": 9122 + }, + { + "epoch": 0.6166689198323645, + "grad_norm": 0.9550191760063171, + "learning_rate": 1.0236901458367485e-05, + "loss": 0.13413429260253906, + "step": 9123 + }, + { + "epoch": 0.6167365148032986, + "grad_norm": 1.3984043598175049, + "learning_rate": 1.0233772810941975e-05, + "loss": 0.282073974609375, + "step": 9124 + }, + { + "epoch": 0.6168041097742328, + "grad_norm": 0.49435943365097046, + "learning_rate": 1.023064439413617e-05, + "loss": 0.08137130737304688, + "step": 9125 + }, + { + "epoch": 0.616871704745167, + "grad_norm": 0.4652899205684662, + "learning_rate": 1.0227516208101455e-05, + "loss": 0.07019805908203125, + "step": 9126 + }, + { + "epoch": 0.6169392997161012, + "grad_norm": 0.3366926908493042, + "learning_rate": 1.0224388252989185e-05, + "loss": 0.055118560791015625, + "step": 9127 + }, + { + "epoch": 0.6170068946870353, + "grad_norm": 1.1491326093673706, + "learning_rate": 1.0221260528950705e-05, + "loss": 0.12440109252929688, + "step": 9128 + }, + { + "epoch": 0.6170744896579694, + "grad_norm": 0.234394371509552, + "learning_rate": 1.0218133036137366e-05, + "loss": 0.05121612548828125, + "step": 9129 + }, + { + "epoch": 0.6171420846289036, + "grad_norm": 0.37741154432296753, + "learning_rate": 1.0215005774700484e-05, + "loss": 0.042858123779296875, + "step": 9130 + }, + { + "epoch": 0.6172096795998377, + "grad_norm": 0.42091691493988037, + "learning_rate": 1.0211878744791382e-05, + "loss": 0.103240966796875, + "step": 9131 + }, + { + "epoch": 0.617277274570772, + "grad_norm": 0.7290806174278259, + "learning_rate": 1.020875194656136e-05, + "loss": 0.1238555908203125, + "step": 9132 + }, + { + "epoch": 0.6173448695417061, + "grad_norm": 0.3989068865776062, + "learning_rate": 1.0205625380161723e-05, + "loss": 0.0793609619140625, + "step": 9133 + }, + { + "epoch": 0.6174124645126403, + "grad_norm": 0.4092516601085663, + "learning_rate": 1.0202499045743738e-05, + "loss": 0.0731353759765625, + "step": 9134 + }, + { + "epoch": 0.6174800594835744, + "grad_norm": 0.9942481517791748, + "learning_rate": 1.019937294345869e-05, + "loss": 0.1179962158203125, + "step": 9135 + }, + { + "epoch": 0.6175476544545085, + "grad_norm": 0.6821109056472778, + "learning_rate": 1.0196247073457829e-05, + "loss": 0.132568359375, + "step": 9136 + }, + { + "epoch": 0.6176152494254428, + "grad_norm": 0.2501538395881653, + "learning_rate": 1.0193121435892416e-05, + "loss": 0.04619026184082031, + "step": 9137 + }, + { + "epoch": 0.6176828443963769, + "grad_norm": 0.7558057308197021, + "learning_rate": 1.0189996030913673e-05, + "loss": 0.10033035278320312, + "step": 9138 + }, + { + "epoch": 0.6177504393673111, + "grad_norm": 0.8560110926628113, + "learning_rate": 1.018687085867284e-05, + "loss": 0.20037841796875, + "step": 9139 + }, + { + "epoch": 0.6178180343382452, + "grad_norm": 1.2415051460266113, + "learning_rate": 1.0183745919321131e-05, + "loss": 0.24737548828125, + "step": 9140 + }, + { + "epoch": 0.6178856293091795, + "grad_norm": 0.32875168323516846, + "learning_rate": 1.018062121300974e-05, + "loss": 0.0644378662109375, + "step": 9141 + }, + { + "epoch": 0.6179532242801136, + "grad_norm": 0.4486081898212433, + "learning_rate": 1.017749673988987e-05, + "loss": 0.0873870849609375, + "step": 9142 + }, + { + "epoch": 0.6180208192510477, + "grad_norm": 0.6220736503601074, + "learning_rate": 1.0174372500112694e-05, + "loss": 0.12957000732421875, + "step": 9143 + }, + { + "epoch": 0.6180884142219819, + "grad_norm": 0.9283583164215088, + "learning_rate": 1.0171248493829392e-05, + "loss": 0.19439697265625, + "step": 9144 + }, + { + "epoch": 0.618156009192916, + "grad_norm": 1.0584583282470703, + "learning_rate": 1.0168124721191111e-05, + "loss": 0.183624267578125, + "step": 9145 + }, + { + "epoch": 0.6182236041638502, + "grad_norm": 0.7572372555732727, + "learning_rate": 1.0165001182349013e-05, + "loss": 0.1587982177734375, + "step": 9146 + }, + { + "epoch": 0.6182911991347844, + "grad_norm": 0.5777972340583801, + "learning_rate": 1.016187787745422e-05, + "loss": 0.104766845703125, + "step": 9147 + }, + { + "epoch": 0.6183587941057186, + "grad_norm": 0.2654128968715668, + "learning_rate": 1.0158754806657867e-05, + "loss": 0.041843414306640625, + "step": 9148 + }, + { + "epoch": 0.6184263890766527, + "grad_norm": 0.5381895899772644, + "learning_rate": 1.015563197011106e-05, + "loss": 0.0983428955078125, + "step": 9149 + }, + { + "epoch": 0.6184939840475868, + "grad_norm": 0.4311756491661072, + "learning_rate": 1.015250936796491e-05, + "loss": 0.08454513549804688, + "step": 9150 + }, + { + "epoch": 0.618561579018521, + "grad_norm": 0.21397531032562256, + "learning_rate": 1.0149387000370502e-05, + "loss": 0.0398406982421875, + "step": 9151 + }, + { + "epoch": 0.6186291739894552, + "grad_norm": 1.1898622512817383, + "learning_rate": 1.0146264867478911e-05, + "loss": 0.1554412841796875, + "step": 9152 + }, + { + "epoch": 0.6186967689603894, + "grad_norm": 0.3975217938423157, + "learning_rate": 1.014314296944122e-05, + "loss": 0.0530548095703125, + "step": 9153 + }, + { + "epoch": 0.6187643639313235, + "grad_norm": 0.6412119269371033, + "learning_rate": 1.0140021306408468e-05, + "loss": 0.09716415405273438, + "step": 9154 + }, + { + "epoch": 0.6188319589022576, + "grad_norm": 0.5349517464637756, + "learning_rate": 1.0136899878531714e-05, + "loss": 0.09973907470703125, + "step": 9155 + }, + { + "epoch": 0.6188995538731918, + "grad_norm": 0.9899634122848511, + "learning_rate": 1.0133778685961985e-05, + "loss": 0.190887451171875, + "step": 9156 + }, + { + "epoch": 0.618967148844126, + "grad_norm": 0.5373696684837341, + "learning_rate": 1.0130657728850308e-05, + "loss": 0.0925750732421875, + "step": 9157 + }, + { + "epoch": 0.6190347438150602, + "grad_norm": 0.7440584301948547, + "learning_rate": 1.012753700734769e-05, + "loss": 0.10442733764648438, + "step": 9158 + }, + { + "epoch": 0.6191023387859943, + "grad_norm": 0.8341318964958191, + "learning_rate": 1.0124416521605137e-05, + "loss": 0.1734619140625, + "step": 9159 + }, + { + "epoch": 0.6191699337569285, + "grad_norm": 0.7222172021865845, + "learning_rate": 1.0121296271773628e-05, + "loss": 0.09704208374023438, + "step": 9160 + }, + { + "epoch": 0.6192375287278626, + "grad_norm": 1.2155314683914185, + "learning_rate": 1.0118176258004147e-05, + "loss": 0.1609344482421875, + "step": 9161 + }, + { + "epoch": 0.6193051236987968, + "grad_norm": 0.3191359043121338, + "learning_rate": 1.0115056480447664e-05, + "loss": 0.04341888427734375, + "step": 9162 + }, + { + "epoch": 0.619372718669731, + "grad_norm": 0.5842559337615967, + "learning_rate": 1.011193693925512e-05, + "loss": 0.11354827880859375, + "step": 9163 + }, + { + "epoch": 0.6194403136406651, + "grad_norm": 1.1106047630310059, + "learning_rate": 1.0108817634577469e-05, + "loss": 0.17211151123046875, + "step": 9164 + }, + { + "epoch": 0.6195079086115993, + "grad_norm": 0.34723201394081116, + "learning_rate": 1.0105698566565637e-05, + "loss": 0.0770263671875, + "step": 9165 + }, + { + "epoch": 0.6195755035825334, + "grad_norm": 0.2274944931268692, + "learning_rate": 1.010257973537055e-05, + "loss": 0.024835586547851562, + "step": 9166 + }, + { + "epoch": 0.6196430985534677, + "grad_norm": 0.6612932085990906, + "learning_rate": 1.0099461141143108e-05, + "loss": 0.0966339111328125, + "step": 9167 + }, + { + "epoch": 0.6197106935244018, + "grad_norm": 0.23810167610645294, + "learning_rate": 1.0096342784034213e-05, + "loss": 0.04146766662597656, + "step": 9168 + }, + { + "epoch": 0.6197782884953359, + "grad_norm": 0.6829577088356018, + "learning_rate": 1.0093224664194747e-05, + "loss": 0.1058349609375, + "step": 9169 + }, + { + "epoch": 0.6198458834662701, + "grad_norm": 0.692651093006134, + "learning_rate": 1.0090106781775593e-05, + "loss": 0.1423797607421875, + "step": 9170 + }, + { + "epoch": 0.6199134784372042, + "grad_norm": 0.3839079737663269, + "learning_rate": 1.0086989136927602e-05, + "loss": 0.0625457763671875, + "step": 9171 + }, + { + "epoch": 0.6199810734081385, + "grad_norm": 0.44399768114089966, + "learning_rate": 1.0083871729801637e-05, + "loss": 0.07523345947265625, + "step": 9172 + }, + { + "epoch": 0.6200486683790726, + "grad_norm": 0.20311880111694336, + "learning_rate": 1.0080754560548523e-05, + "loss": 0.02936553955078125, + "step": 9173 + }, + { + "epoch": 0.6201162633500068, + "grad_norm": 0.7572185397148132, + "learning_rate": 1.0077637629319098e-05, + "loss": 0.11870002746582031, + "step": 9174 + }, + { + "epoch": 0.6201838583209409, + "grad_norm": 0.8266907930374146, + "learning_rate": 1.0074520936264182e-05, + "loss": 0.1172027587890625, + "step": 9175 + }, + { + "epoch": 0.620251453291875, + "grad_norm": 0.3580428957939148, + "learning_rate": 1.0071404481534567e-05, + "loss": 0.05145263671875, + "step": 9176 + }, + { + "epoch": 0.6203190482628093, + "grad_norm": 0.9222142696380615, + "learning_rate": 1.0068288265281056e-05, + "loss": 0.11226654052734375, + "step": 9177 + }, + { + "epoch": 0.6203866432337434, + "grad_norm": 1.5755343437194824, + "learning_rate": 1.0065172287654425e-05, + "loss": 0.21563720703125, + "step": 9178 + }, + { + "epoch": 0.6204542382046776, + "grad_norm": 0.32506340742111206, + "learning_rate": 1.0062056548805456e-05, + "loss": 0.05998992919921875, + "step": 9179 + }, + { + "epoch": 0.6205218331756117, + "grad_norm": 0.6856978535652161, + "learning_rate": 1.0058941048884894e-05, + "loss": 0.11810302734375, + "step": 9180 + }, + { + "epoch": 0.6205894281465459, + "grad_norm": 1.0140691995620728, + "learning_rate": 1.0055825788043494e-05, + "loss": 0.1628875732421875, + "step": 9181 + }, + { + "epoch": 0.6206570231174801, + "grad_norm": 0.5185614824295044, + "learning_rate": 1.0052710766431987e-05, + "loss": 0.07019805908203125, + "step": 9182 + }, + { + "epoch": 0.6207246180884142, + "grad_norm": 0.36374610662460327, + "learning_rate": 1.0049595984201106e-05, + "loss": 0.091705322265625, + "step": 9183 + }, + { + "epoch": 0.6207922130593484, + "grad_norm": 0.7245438694953918, + "learning_rate": 1.0046481441501554e-05, + "loss": 0.12692642211914062, + "step": 9184 + }, + { + "epoch": 0.6208598080302825, + "grad_norm": 0.6777626276016235, + "learning_rate": 1.0043367138484036e-05, + "loss": 0.13189697265625, + "step": 9185 + }, + { + "epoch": 0.6209274030012167, + "grad_norm": 1.0042951107025146, + "learning_rate": 1.0040253075299244e-05, + "loss": 0.1264190673828125, + "step": 9186 + }, + { + "epoch": 0.6209949979721509, + "grad_norm": 0.8730651140213013, + "learning_rate": 1.0037139252097846e-05, + "loss": 0.1424560546875, + "step": 9187 + }, + { + "epoch": 0.6210625929430851, + "grad_norm": 0.7469245195388794, + "learning_rate": 1.0034025669030523e-05, + "loss": 0.1358795166015625, + "step": 9188 + }, + { + "epoch": 0.6211301879140192, + "grad_norm": 0.9985861778259277, + "learning_rate": 1.0030912326247913e-05, + "loss": 0.1576995849609375, + "step": 9189 + }, + { + "epoch": 0.6211977828849533, + "grad_norm": 1.0439287424087524, + "learning_rate": 1.002779922390067e-05, + "loss": 0.1438140869140625, + "step": 9190 + }, + { + "epoch": 0.6212653778558875, + "grad_norm": 0.18748019635677338, + "learning_rate": 1.0024686362139418e-05, + "loss": 0.024326324462890625, + "step": 9191 + }, + { + "epoch": 0.6213329728268217, + "grad_norm": 1.801612377166748, + "learning_rate": 1.002157374111479e-05, + "loss": 0.239227294921875, + "step": 9192 + }, + { + "epoch": 0.6214005677977559, + "grad_norm": 0.43550845980644226, + "learning_rate": 1.0018461360977377e-05, + "loss": 0.06902313232421875, + "step": 9193 + }, + { + "epoch": 0.62146816276869, + "grad_norm": 0.1501081883907318, + "learning_rate": 1.0015349221877788e-05, + "loss": 0.024227142333984375, + "step": 9194 + }, + { + "epoch": 0.6215357577396242, + "grad_norm": 0.3636987507343292, + "learning_rate": 1.0012237323966599e-05, + "loss": 0.0736846923828125, + "step": 9195 + }, + { + "epoch": 0.6216033527105583, + "grad_norm": 1.612775206565857, + "learning_rate": 1.0009125667394395e-05, + "loss": 0.18549346923828125, + "step": 9196 + }, + { + "epoch": 0.6216709476814924, + "grad_norm": 0.47531023621559143, + "learning_rate": 1.0006014252311723e-05, + "loss": 0.054630279541015625, + "step": 9197 + }, + { + "epoch": 0.6217385426524267, + "grad_norm": 0.3660336136817932, + "learning_rate": 1.0002903078869137e-05, + "loss": 0.06446075439453125, + "step": 9198 + }, + { + "epoch": 0.6218061376233608, + "grad_norm": 1.283100962638855, + "learning_rate": 9.999792147217183e-06, + "loss": 0.10633468627929688, + "step": 9199 + }, + { + "epoch": 0.621873732594295, + "grad_norm": 0.3075190782546997, + "learning_rate": 9.996681457506373e-06, + "loss": 0.053985595703125, + "step": 9200 + }, + { + "epoch": 0.6219413275652291, + "grad_norm": 0.3631341755390167, + "learning_rate": 9.993571009887232e-06, + "loss": 0.06775665283203125, + "step": 9201 + }, + { + "epoch": 0.6220089225361634, + "grad_norm": 1.0229030847549438, + "learning_rate": 9.99046080451026e-06, + "loss": 0.216064453125, + "step": 9202 + }, + { + "epoch": 0.6220765175070975, + "grad_norm": 0.8578897714614868, + "learning_rate": 9.987350841525954e-06, + "loss": 0.0982522964477539, + "step": 9203 + }, + { + "epoch": 0.6221441124780316, + "grad_norm": 0.9103871583938599, + "learning_rate": 9.98424112108478e-06, + "loss": 0.15399169921875, + "step": 9204 + }, + { + "epoch": 0.6222117074489658, + "grad_norm": 0.3376079797744751, + "learning_rate": 9.981131643337219e-06, + "loss": 0.06479644775390625, + "step": 9205 + }, + { + "epoch": 0.6222793024198999, + "grad_norm": 0.4941045939922333, + "learning_rate": 9.978022408433716e-06, + "loss": 0.07967376708984375, + "step": 9206 + }, + { + "epoch": 0.6223468973908342, + "grad_norm": 0.1802889108657837, + "learning_rate": 9.974913416524725e-06, + "loss": 0.0194091796875, + "step": 9207 + }, + { + "epoch": 0.6224144923617683, + "grad_norm": 0.7169668078422546, + "learning_rate": 9.971804667760668e-06, + "loss": 0.143585205078125, + "step": 9208 + }, + { + "epoch": 0.6224820873327025, + "grad_norm": 0.5084713101387024, + "learning_rate": 9.968696162291978e-06, + "loss": 0.08245849609375, + "step": 9209 + }, + { + "epoch": 0.6225496823036366, + "grad_norm": 0.3780023753643036, + "learning_rate": 9.965587900269054e-06, + "loss": 0.072479248046875, + "step": 9210 + }, + { + "epoch": 0.6226172772745707, + "grad_norm": 0.3181990087032318, + "learning_rate": 9.962479881842292e-06, + "loss": 0.041534423828125, + "step": 9211 + }, + { + "epoch": 0.622684872245505, + "grad_norm": 0.8019822835922241, + "learning_rate": 9.959372107162089e-06, + "loss": 0.1256256103515625, + "step": 9212 + }, + { + "epoch": 0.6227524672164391, + "grad_norm": 1.0378060340881348, + "learning_rate": 9.956264576378805e-06, + "loss": 0.16656494140625, + "step": 9213 + }, + { + "epoch": 0.6228200621873733, + "grad_norm": 1.1160064935684204, + "learning_rate": 9.953157289642808e-06, + "loss": 0.216705322265625, + "step": 9214 + }, + { + "epoch": 0.6228876571583074, + "grad_norm": 0.9296790361404419, + "learning_rate": 9.950050247104447e-06, + "loss": 0.134124755859375, + "step": 9215 + }, + { + "epoch": 0.6229552521292416, + "grad_norm": 1.0063430070877075, + "learning_rate": 9.946943448914064e-06, + "loss": 0.1363391876220703, + "step": 9216 + }, + { + "epoch": 0.6230228471001757, + "grad_norm": 0.4298604130744934, + "learning_rate": 9.943836895221978e-06, + "loss": 0.08634185791015625, + "step": 9217 + }, + { + "epoch": 0.6230904420711099, + "grad_norm": 0.3203783333301544, + "learning_rate": 9.940730586178512e-06, + "loss": 0.05470466613769531, + "step": 9218 + }, + { + "epoch": 0.6231580370420441, + "grad_norm": 0.5689027309417725, + "learning_rate": 9.937624521933955e-06, + "loss": 0.12604141235351562, + "step": 9219 + }, + { + "epoch": 0.6232256320129782, + "grad_norm": 0.456808477640152, + "learning_rate": 9.934518702638613e-06, + "loss": 0.09067535400390625, + "step": 9220 + }, + { + "epoch": 0.6232932269839124, + "grad_norm": 1.5567677021026611, + "learning_rate": 9.93141312844276e-06, + "loss": 0.16020965576171875, + "step": 9221 + }, + { + "epoch": 0.6233608219548465, + "grad_norm": 0.9186071753501892, + "learning_rate": 9.928307799496654e-06, + "loss": 0.171600341796875, + "step": 9222 + }, + { + "epoch": 0.6234284169257808, + "grad_norm": 0.40504536032676697, + "learning_rate": 9.925202715950562e-06, + "loss": 0.07186126708984375, + "step": 9223 + }, + { + "epoch": 0.6234960118967149, + "grad_norm": 0.2341541349887848, + "learning_rate": 9.922097877954718e-06, + "loss": 0.047393798828125, + "step": 9224 + }, + { + "epoch": 0.623563606867649, + "grad_norm": 1.2788004875183105, + "learning_rate": 9.918993285659363e-06, + "loss": 0.21240234375, + "step": 9225 + }, + { + "epoch": 0.6236312018385832, + "grad_norm": 0.2673114538192749, + "learning_rate": 9.915888939214706e-06, + "loss": 0.06252288818359375, + "step": 9226 + }, + { + "epoch": 0.6236987968095173, + "grad_norm": 0.3301141560077667, + "learning_rate": 9.912784838770965e-06, + "loss": 0.049663543701171875, + "step": 9227 + }, + { + "epoch": 0.6237663917804516, + "grad_norm": 1.1200670003890991, + "learning_rate": 9.909680984478324e-06, + "loss": 0.191436767578125, + "step": 9228 + }, + { + "epoch": 0.6238339867513857, + "grad_norm": 0.4959685802459717, + "learning_rate": 9.906577376486982e-06, + "loss": 0.07834625244140625, + "step": 9229 + }, + { + "epoch": 0.6239015817223199, + "grad_norm": 0.25218433141708374, + "learning_rate": 9.903474014947094e-06, + "loss": 0.053012847900390625, + "step": 9230 + }, + { + "epoch": 0.623969176693254, + "grad_norm": 1.076664924621582, + "learning_rate": 9.900370900008831e-06, + "loss": 0.14031219482421875, + "step": 9231 + }, + { + "epoch": 0.6240367716641881, + "grad_norm": 1.2529301643371582, + "learning_rate": 9.897268031822343e-06, + "loss": 0.165985107421875, + "step": 9232 + }, + { + "epoch": 0.6241043666351224, + "grad_norm": 0.16538691520690918, + "learning_rate": 9.894165410537757e-06, + "loss": 0.029052734375, + "step": 9233 + }, + { + "epoch": 0.6241719616060565, + "grad_norm": 0.2738566994667053, + "learning_rate": 9.891063036305202e-06, + "loss": 0.035121917724609375, + "step": 9234 + }, + { + "epoch": 0.6242395565769907, + "grad_norm": 0.32438555359840393, + "learning_rate": 9.887960909274786e-06, + "loss": 0.0614013671875, + "step": 9235 + }, + { + "epoch": 0.6243071515479248, + "grad_norm": 0.784261167049408, + "learning_rate": 9.884859029596617e-06, + "loss": 0.13205718994140625, + "step": 9236 + }, + { + "epoch": 0.624374746518859, + "grad_norm": 0.7687101364135742, + "learning_rate": 9.881757397420776e-06, + "loss": 0.13845062255859375, + "step": 9237 + }, + { + "epoch": 0.6244423414897932, + "grad_norm": 0.1852072775363922, + "learning_rate": 9.878656012897346e-06, + "loss": 0.031783103942871094, + "step": 9238 + }, + { + "epoch": 0.6245099364607273, + "grad_norm": 0.7378827333450317, + "learning_rate": 9.875554876176381e-06, + "loss": 0.10522079467773438, + "step": 9239 + }, + { + "epoch": 0.6245775314316615, + "grad_norm": 0.8993417024612427, + "learning_rate": 9.872453987407945e-06, + "loss": 0.19036865234375, + "step": 9240 + }, + { + "epoch": 0.6246451264025956, + "grad_norm": 1.0836561918258667, + "learning_rate": 9.869353346742068e-06, + "loss": 0.212890625, + "step": 9241 + }, + { + "epoch": 0.6247127213735298, + "grad_norm": 0.6964963674545288, + "learning_rate": 9.866252954328792e-06, + "loss": 0.135589599609375, + "step": 9242 + }, + { + "epoch": 0.624780316344464, + "grad_norm": 0.36994388699531555, + "learning_rate": 9.863152810318115e-06, + "loss": 0.07123565673828125, + "step": 9243 + }, + { + "epoch": 0.6248479113153982, + "grad_norm": 0.5700315833091736, + "learning_rate": 9.860052914860057e-06, + "loss": 0.1273956298828125, + "step": 9244 + }, + { + "epoch": 0.6249155062863323, + "grad_norm": 0.6927855610847473, + "learning_rate": 9.856953268104605e-06, + "loss": 0.11518096923828125, + "step": 9245 + }, + { + "epoch": 0.6249831012572664, + "grad_norm": 0.556553304195404, + "learning_rate": 9.853853870201731e-06, + "loss": 0.08336639404296875, + "step": 9246 + }, + { + "epoch": 0.6250506962282006, + "grad_norm": 0.693833589553833, + "learning_rate": 9.85075472130142e-06, + "loss": 0.12900543212890625, + "step": 9247 + }, + { + "epoch": 0.6251182911991348, + "grad_norm": 0.8250555992126465, + "learning_rate": 9.847655821553608e-06, + "loss": 0.120391845703125, + "step": 9248 + }, + { + "epoch": 0.625185886170069, + "grad_norm": 0.6980171799659729, + "learning_rate": 9.844557171108256e-06, + "loss": 0.166778564453125, + "step": 9249 + }, + { + "epoch": 0.6252534811410031, + "grad_norm": 0.23969857394695282, + "learning_rate": 9.841458770115285e-06, + "loss": 0.0359039306640625, + "step": 9250 + }, + { + "epoch": 0.6253210761119373, + "grad_norm": 0.4480706453323364, + "learning_rate": 9.838360618724626e-06, + "loss": 0.08100128173828125, + "step": 9251 + }, + { + "epoch": 0.6253886710828714, + "grad_norm": 0.33356615900993347, + "learning_rate": 9.835262717086174e-06, + "loss": 0.05573272705078125, + "step": 9252 + }, + { + "epoch": 0.6254562660538056, + "grad_norm": 0.7009574770927429, + "learning_rate": 9.832165065349832e-06, + "loss": 0.102325439453125, + "step": 9253 + }, + { + "epoch": 0.6255238610247398, + "grad_norm": 0.28693994879722595, + "learning_rate": 9.829067663665481e-06, + "loss": 0.03259086608886719, + "step": 9254 + }, + { + "epoch": 0.6255914559956739, + "grad_norm": 0.38170236349105835, + "learning_rate": 9.825970512183002e-06, + "loss": 0.0667572021484375, + "step": 9255 + }, + { + "epoch": 0.6256590509666081, + "grad_norm": 1.134450912475586, + "learning_rate": 9.82287361105224e-06, + "loss": 0.190032958984375, + "step": 9256 + }, + { + "epoch": 0.6257266459375422, + "grad_norm": 0.6965985298156738, + "learning_rate": 9.819776960423049e-06, + "loss": 0.1178741455078125, + "step": 9257 + }, + { + "epoch": 0.6257942409084765, + "grad_norm": 0.5181556940078735, + "learning_rate": 9.81668056044527e-06, + "loss": 0.1326446533203125, + "step": 9258 + }, + { + "epoch": 0.6258618358794106, + "grad_norm": 0.3215530812740326, + "learning_rate": 9.813584411268711e-06, + "loss": 0.05971336364746094, + "step": 9259 + }, + { + "epoch": 0.6259294308503447, + "grad_norm": 1.0060579776763916, + "learning_rate": 9.810488513043197e-06, + "loss": 0.184478759765625, + "step": 9260 + }, + { + "epoch": 0.6259970258212789, + "grad_norm": 0.9590877294540405, + "learning_rate": 9.80739286591852e-06, + "loss": 0.118896484375, + "step": 9261 + }, + { + "epoch": 0.626064620792213, + "grad_norm": 0.8134473562240601, + "learning_rate": 9.804297470044473e-06, + "loss": 0.13788604736328125, + "step": 9262 + }, + { + "epoch": 0.6261322157631473, + "grad_norm": 0.35079798102378845, + "learning_rate": 9.80120232557082e-06, + "loss": 0.08258819580078125, + "step": 9263 + }, + { + "epoch": 0.6261998107340814, + "grad_norm": 0.2753715217113495, + "learning_rate": 9.798107432647337e-06, + "loss": 0.05448150634765625, + "step": 9264 + }, + { + "epoch": 0.6262674057050156, + "grad_norm": 0.9716290831565857, + "learning_rate": 9.79501279142376e-06, + "loss": 0.1148681640625, + "step": 9265 + }, + { + "epoch": 0.6263350006759497, + "grad_norm": 0.8092653155326843, + "learning_rate": 9.791918402049838e-06, + "loss": 0.1768646240234375, + "step": 9266 + }, + { + "epoch": 0.6264025956468838, + "grad_norm": 0.7073147892951965, + "learning_rate": 9.788824264675293e-06, + "loss": 0.11389541625976562, + "step": 9267 + }, + { + "epoch": 0.6264701906178181, + "grad_norm": 0.3391081988811493, + "learning_rate": 9.785730379449832e-06, + "loss": 0.055484771728515625, + "step": 9268 + }, + { + "epoch": 0.6265377855887522, + "grad_norm": 0.562018871307373, + "learning_rate": 9.782636746523168e-06, + "loss": 0.09228515625, + "step": 9269 + }, + { + "epoch": 0.6266053805596864, + "grad_norm": 0.4825170338153839, + "learning_rate": 9.77954336604498e-06, + "loss": 0.07281875610351562, + "step": 9270 + }, + { + "epoch": 0.6266729755306205, + "grad_norm": 0.2769322097301483, + "learning_rate": 9.776450238164955e-06, + "loss": 0.054477691650390625, + "step": 9271 + }, + { + "epoch": 0.6267405705015547, + "grad_norm": 0.3389997184276581, + "learning_rate": 9.773357363032745e-06, + "loss": 0.043384552001953125, + "step": 9272 + }, + { + "epoch": 0.6268081654724889, + "grad_norm": 1.0510296821594238, + "learning_rate": 9.770264740798018e-06, + "loss": 0.102935791015625, + "step": 9273 + }, + { + "epoch": 0.626875760443423, + "grad_norm": 0.6334699988365173, + "learning_rate": 9.767172371610398e-06, + "loss": 0.13421630859375, + "step": 9274 + }, + { + "epoch": 0.6269433554143572, + "grad_norm": 0.27215319871902466, + "learning_rate": 9.764080255619531e-06, + "loss": 0.0517578125, + "step": 9275 + }, + { + "epoch": 0.6270109503852913, + "grad_norm": 0.6575352549552917, + "learning_rate": 9.760988392975014e-06, + "loss": 0.1229400634765625, + "step": 9276 + }, + { + "epoch": 0.6270785453562255, + "grad_norm": 0.5767965912818909, + "learning_rate": 9.757896783826465e-06, + "loss": 0.097442626953125, + "step": 9277 + }, + { + "epoch": 0.6271461403271597, + "grad_norm": 0.3249448835849762, + "learning_rate": 9.754805428323466e-06, + "loss": 0.05950927734375, + "step": 9278 + }, + { + "epoch": 0.6272137352980938, + "grad_norm": 0.6886587738990784, + "learning_rate": 9.751714326615605e-06, + "loss": 0.11138916015625, + "step": 9279 + }, + { + "epoch": 0.627281330269028, + "grad_norm": 0.6058802008628845, + "learning_rate": 9.748623478852445e-06, + "loss": 0.1153717041015625, + "step": 9280 + }, + { + "epoch": 0.6273489252399621, + "grad_norm": 0.3062431216239929, + "learning_rate": 9.745532885183532e-06, + "loss": 0.05303192138671875, + "step": 9281 + }, + { + "epoch": 0.6274165202108963, + "grad_norm": 0.2728933095932007, + "learning_rate": 9.742442545758419e-06, + "loss": 0.050384521484375, + "step": 9282 + }, + { + "epoch": 0.6274841151818304, + "grad_norm": 0.3102063536643982, + "learning_rate": 9.73935246072663e-06, + "loss": 0.054347991943359375, + "step": 9283 + }, + { + "epoch": 0.6275517101527647, + "grad_norm": 0.3205401301383972, + "learning_rate": 9.73626263023769e-06, + "loss": 0.06283187866210938, + "step": 9284 + }, + { + "epoch": 0.6276193051236988, + "grad_norm": 1.471308946609497, + "learning_rate": 9.73317305444109e-06, + "loss": 0.12025928497314453, + "step": 9285 + }, + { + "epoch": 0.6276869000946329, + "grad_norm": 1.1843868494033813, + "learning_rate": 9.73008373348634e-06, + "loss": 0.136444091796875, + "step": 9286 + }, + { + "epoch": 0.6277544950655671, + "grad_norm": 0.7053358554840088, + "learning_rate": 9.726994667522905e-06, + "loss": 0.131927490234375, + "step": 9287 + }, + { + "epoch": 0.6278220900365012, + "grad_norm": 0.38643893599510193, + "learning_rate": 9.723905856700265e-06, + "loss": 0.03803062438964844, + "step": 9288 + }, + { + "epoch": 0.6278896850074355, + "grad_norm": 0.7025181651115417, + "learning_rate": 9.720817301167869e-06, + "loss": 0.08130264282226562, + "step": 9289 + }, + { + "epoch": 0.6279572799783696, + "grad_norm": 0.3170574903488159, + "learning_rate": 9.717729001075165e-06, + "loss": 0.0268402099609375, + "step": 9290 + }, + { + "epoch": 0.6280248749493038, + "grad_norm": 1.6199069023132324, + "learning_rate": 9.714640956571585e-06, + "loss": 0.243011474609375, + "step": 9291 + }, + { + "epoch": 0.6280924699202379, + "grad_norm": 0.6456925272941589, + "learning_rate": 9.711553167806538e-06, + "loss": 0.1260528564453125, + "step": 9292 + }, + { + "epoch": 0.628160064891172, + "grad_norm": 0.687814474105835, + "learning_rate": 9.708465634929444e-06, + "loss": 0.0608978271484375, + "step": 9293 + }, + { + "epoch": 0.6282276598621063, + "grad_norm": 0.47486552596092224, + "learning_rate": 9.70537835808968e-06, + "loss": 0.070037841796875, + "step": 9294 + }, + { + "epoch": 0.6282952548330404, + "grad_norm": 0.766139030456543, + "learning_rate": 9.70229133743665e-06, + "loss": 0.1125946044921875, + "step": 9295 + }, + { + "epoch": 0.6283628498039746, + "grad_norm": 0.2986295521259308, + "learning_rate": 9.699204573119702e-06, + "loss": 0.05063629150390625, + "step": 9296 + }, + { + "epoch": 0.6284304447749087, + "grad_norm": 0.8275061249732971, + "learning_rate": 9.696118065288211e-06, + "loss": 0.11688232421875, + "step": 9297 + }, + { + "epoch": 0.628498039745843, + "grad_norm": 0.44494059681892395, + "learning_rate": 9.693031814091504e-06, + "loss": 0.10604095458984375, + "step": 9298 + }, + { + "epoch": 0.6285656347167771, + "grad_norm": 0.22445106506347656, + "learning_rate": 9.689945819678924e-06, + "loss": 0.040973663330078125, + "step": 9299 + }, + { + "epoch": 0.6286332296877112, + "grad_norm": 1.044467568397522, + "learning_rate": 9.686860082199786e-06, + "loss": 0.193756103515625, + "step": 9300 + }, + { + "epoch": 0.6287008246586454, + "grad_norm": 0.7305492162704468, + "learning_rate": 9.683774601803405e-06, + "loss": 0.1359405517578125, + "step": 9301 + }, + { + "epoch": 0.6287684196295795, + "grad_norm": 0.3086448907852173, + "learning_rate": 9.68068937863907e-06, + "loss": 0.041454315185546875, + "step": 9302 + }, + { + "epoch": 0.6288360146005137, + "grad_norm": 0.37700918316841125, + "learning_rate": 9.677604412856059e-06, + "loss": 0.07669448852539062, + "step": 9303 + }, + { + "epoch": 0.6289036095714479, + "grad_norm": 0.6644365191459656, + "learning_rate": 9.67451970460365e-06, + "loss": 0.1409454345703125, + "step": 9304 + }, + { + "epoch": 0.6289712045423821, + "grad_norm": 0.33921295404434204, + "learning_rate": 9.67143525403109e-06, + "loss": 0.06298828125, + "step": 9305 + }, + { + "epoch": 0.6290387995133162, + "grad_norm": 0.5584511756896973, + "learning_rate": 9.668351061287634e-06, + "loss": 0.1071014404296875, + "step": 9306 + }, + { + "epoch": 0.6291063944842503, + "grad_norm": 0.2572135031223297, + "learning_rate": 9.665267126522511e-06, + "loss": 0.03353118896484375, + "step": 9307 + }, + { + "epoch": 0.6291739894551845, + "grad_norm": 0.6015111804008484, + "learning_rate": 9.662183449884945e-06, + "loss": 0.1402587890625, + "step": 9308 + }, + { + "epoch": 0.6292415844261187, + "grad_norm": 1.0423682928085327, + "learning_rate": 9.659100031524132e-06, + "loss": 0.1728515625, + "step": 9309 + }, + { + "epoch": 0.6293091793970529, + "grad_norm": 0.5987850427627563, + "learning_rate": 9.656016871589282e-06, + "loss": 0.10235595703125, + "step": 9310 + }, + { + "epoch": 0.629376774367987, + "grad_norm": 0.8306840062141418, + "learning_rate": 9.652933970229562e-06, + "loss": 0.190093994140625, + "step": 9311 + }, + { + "epoch": 0.6294443693389212, + "grad_norm": 0.2628072500228882, + "learning_rate": 9.649851327594153e-06, + "loss": 0.058441162109375, + "step": 9312 + }, + { + "epoch": 0.6295119643098553, + "grad_norm": 0.7278566360473633, + "learning_rate": 9.646768943832204e-06, + "loss": 0.16943359375, + "step": 9313 + }, + { + "epoch": 0.6295795592807895, + "grad_norm": 0.3984605073928833, + "learning_rate": 9.643686819092877e-06, + "loss": 0.0543670654296875, + "step": 9314 + }, + { + "epoch": 0.6296471542517237, + "grad_norm": 0.9071235060691833, + "learning_rate": 9.640604953525283e-06, + "loss": 0.11537933349609375, + "step": 9315 + }, + { + "epoch": 0.6297147492226578, + "grad_norm": 0.37194013595581055, + "learning_rate": 9.63752334727855e-06, + "loss": 0.0477142333984375, + "step": 9316 + }, + { + "epoch": 0.629782344193592, + "grad_norm": 0.2385352998971939, + "learning_rate": 9.634442000501795e-06, + "loss": 0.0525360107421875, + "step": 9317 + }, + { + "epoch": 0.6298499391645261, + "grad_norm": 0.5095500946044922, + "learning_rate": 9.631360913344095e-06, + "loss": 0.09954833984375, + "step": 9318 + }, + { + "epoch": 0.6299175341354604, + "grad_norm": 0.3908403813838959, + "learning_rate": 9.628280085954545e-06, + "loss": 0.0789642333984375, + "step": 9319 + }, + { + "epoch": 0.6299851291063945, + "grad_norm": 0.6979023814201355, + "learning_rate": 9.625199518482207e-06, + "loss": 0.12682342529296875, + "step": 9320 + }, + { + "epoch": 0.6300527240773286, + "grad_norm": 0.310932457447052, + "learning_rate": 9.62211921107615e-06, + "loss": 0.0563201904296875, + "step": 9321 + }, + { + "epoch": 0.6301203190482628, + "grad_norm": 0.4004381000995636, + "learning_rate": 9.6190391638854e-06, + "loss": 0.0854644775390625, + "step": 9322 + }, + { + "epoch": 0.6301879140191969, + "grad_norm": 0.46169546246528625, + "learning_rate": 9.615959377059005e-06, + "loss": 0.1065521240234375, + "step": 9323 + }, + { + "epoch": 0.6302555089901312, + "grad_norm": 0.3830135762691498, + "learning_rate": 9.612879850745977e-06, + "loss": 0.07323455810546875, + "step": 9324 + }, + { + "epoch": 0.6303231039610653, + "grad_norm": 0.8561091423034668, + "learning_rate": 9.609800585095329e-06, + "loss": 0.11750030517578125, + "step": 9325 + }, + { + "epoch": 0.6303906989319995, + "grad_norm": 0.3015992045402527, + "learning_rate": 9.606721580256048e-06, + "loss": 0.0445404052734375, + "step": 9326 + }, + { + "epoch": 0.6304582939029336, + "grad_norm": 0.35589390993118286, + "learning_rate": 9.603642836377112e-06, + "loss": 0.06194305419921875, + "step": 9327 + }, + { + "epoch": 0.6305258888738677, + "grad_norm": 0.341098815202713, + "learning_rate": 9.600564353607498e-06, + "loss": 0.0669708251953125, + "step": 9328 + }, + { + "epoch": 0.630593483844802, + "grad_norm": 0.37803909182548523, + "learning_rate": 9.597486132096158e-06, + "loss": 0.08277130126953125, + "step": 9329 + }, + { + "epoch": 0.6306610788157361, + "grad_norm": 0.5996582508087158, + "learning_rate": 9.594408171992042e-06, + "loss": 0.097137451171875, + "step": 9330 + }, + { + "epoch": 0.6307286737866703, + "grad_norm": 0.6589090824127197, + "learning_rate": 9.59133047344407e-06, + "loss": 0.1624755859375, + "step": 9331 + }, + { + "epoch": 0.6307962687576044, + "grad_norm": 0.24884919822216034, + "learning_rate": 9.588253036601169e-06, + "loss": 0.03299903869628906, + "step": 9332 + }, + { + "epoch": 0.6308638637285386, + "grad_norm": 0.44936835765838623, + "learning_rate": 9.585175861612237e-06, + "loss": 0.06825828552246094, + "step": 9333 + }, + { + "epoch": 0.6309314586994728, + "grad_norm": 0.7767287492752075, + "learning_rate": 9.582098948626176e-06, + "loss": 0.116058349609375, + "step": 9334 + }, + { + "epoch": 0.6309990536704069, + "grad_norm": 0.6532617807388306, + "learning_rate": 9.579022297791859e-06, + "loss": 0.1363677978515625, + "step": 9335 + }, + { + "epoch": 0.6310666486413411, + "grad_norm": 0.34955495595932007, + "learning_rate": 9.575945909258157e-06, + "loss": 0.06111907958984375, + "step": 9336 + }, + { + "epoch": 0.6311342436122752, + "grad_norm": 1.0097870826721191, + "learning_rate": 9.572869783173929e-06, + "loss": 0.13135147094726562, + "step": 9337 + }, + { + "epoch": 0.6312018385832094, + "grad_norm": 0.7147176861763, + "learning_rate": 9.569793919688003e-06, + "loss": 0.16754150390625, + "step": 9338 + }, + { + "epoch": 0.6312694335541436, + "grad_norm": 1.0577701330184937, + "learning_rate": 9.566718318949225e-06, + "loss": 0.14560699462890625, + "step": 9339 + }, + { + "epoch": 0.6313370285250778, + "grad_norm": 0.6748275756835938, + "learning_rate": 9.563642981106395e-06, + "loss": 0.10924911499023438, + "step": 9340 + }, + { + "epoch": 0.6314046234960119, + "grad_norm": 0.19399835169315338, + "learning_rate": 9.560567906308333e-06, + "loss": 0.02912139892578125, + "step": 9341 + }, + { + "epoch": 0.631472218466946, + "grad_norm": 0.4087041914463043, + "learning_rate": 9.557493094703817e-06, + "loss": 0.0886077880859375, + "step": 9342 + }, + { + "epoch": 0.6315398134378802, + "grad_norm": 0.5994306802749634, + "learning_rate": 9.55441854644164e-06, + "loss": 0.11031913757324219, + "step": 9343 + }, + { + "epoch": 0.6316074084088144, + "grad_norm": 1.1015774011611938, + "learning_rate": 9.551344261670551e-06, + "loss": 0.15301132202148438, + "step": 9344 + }, + { + "epoch": 0.6316750033797486, + "grad_norm": 0.7140371799468994, + "learning_rate": 9.548270240539318e-06, + "loss": 0.127288818359375, + "step": 9345 + }, + { + "epoch": 0.6317425983506827, + "grad_norm": 0.6310524940490723, + "learning_rate": 9.545196483196667e-06, + "loss": 0.112213134765625, + "step": 9346 + }, + { + "epoch": 0.6318101933216169, + "grad_norm": 0.7604056596755981, + "learning_rate": 9.542122989791343e-06, + "loss": 0.1353302001953125, + "step": 9347 + }, + { + "epoch": 0.631877788292551, + "grad_norm": 0.4984759986400604, + "learning_rate": 9.539049760472044e-06, + "loss": 0.08568572998046875, + "step": 9348 + }, + { + "epoch": 0.6319453832634851, + "grad_norm": 0.8218205571174622, + "learning_rate": 9.53597679538748e-06, + "loss": 0.0951995849609375, + "step": 9349 + }, + { + "epoch": 0.6320129782344194, + "grad_norm": 0.799881100654602, + "learning_rate": 9.532904094686345e-06, + "loss": 0.1189727783203125, + "step": 9350 + }, + { + "epoch": 0.6320805732053535, + "grad_norm": 0.3471709191799164, + "learning_rate": 9.529831658517301e-06, + "loss": 0.051021575927734375, + "step": 9351 + }, + { + "epoch": 0.6321481681762877, + "grad_norm": 0.22034262120723724, + "learning_rate": 9.526759487029025e-06, + "loss": 0.03784370422363281, + "step": 9352 + }, + { + "epoch": 0.6322157631472218, + "grad_norm": 0.6875259280204773, + "learning_rate": 9.523687580370158e-06, + "loss": 0.1365966796875, + "step": 9353 + }, + { + "epoch": 0.6322833581181561, + "grad_norm": 1.0999195575714111, + "learning_rate": 9.52061593868935e-06, + "loss": 0.1874542236328125, + "step": 9354 + }, + { + "epoch": 0.6323509530890902, + "grad_norm": 0.8202097415924072, + "learning_rate": 9.51754456213521e-06, + "loss": 0.13714599609375, + "step": 9355 + }, + { + "epoch": 0.6324185480600243, + "grad_norm": 0.6170259118080139, + "learning_rate": 9.514473450856367e-06, + "loss": 0.08660507202148438, + "step": 9356 + }, + { + "epoch": 0.6324861430309585, + "grad_norm": 0.4490700662136078, + "learning_rate": 9.511402605001408e-06, + "loss": 0.1068267822265625, + "step": 9357 + }, + { + "epoch": 0.6325537380018926, + "grad_norm": 0.773227870464325, + "learning_rate": 9.508332024718926e-06, + "loss": 0.1442413330078125, + "step": 9358 + }, + { + "epoch": 0.6326213329728269, + "grad_norm": 0.6599299311637878, + "learning_rate": 9.50526171015749e-06, + "loss": 0.1514434814453125, + "step": 9359 + }, + { + "epoch": 0.632688927943761, + "grad_norm": 0.49437856674194336, + "learning_rate": 9.50219166146567e-06, + "loss": 0.1028900146484375, + "step": 9360 + }, + { + "epoch": 0.6327565229146952, + "grad_norm": 1.242960810661316, + "learning_rate": 9.499121878792005e-06, + "loss": 0.2318115234375, + "step": 9361 + }, + { + "epoch": 0.6328241178856293, + "grad_norm": 0.8261963129043579, + "learning_rate": 9.496052362285028e-06, + "loss": 0.1524810791015625, + "step": 9362 + }, + { + "epoch": 0.6328917128565634, + "grad_norm": 0.6155471205711365, + "learning_rate": 9.492983112093276e-06, + "loss": 0.0963592529296875, + "step": 9363 + }, + { + "epoch": 0.6329593078274977, + "grad_norm": 0.4460614323616028, + "learning_rate": 9.489914128365241e-06, + "loss": 0.08557510375976562, + "step": 9364 + }, + { + "epoch": 0.6330269027984318, + "grad_norm": 0.7107172012329102, + "learning_rate": 9.486845411249431e-06, + "loss": 0.1386566162109375, + "step": 9365 + }, + { + "epoch": 0.633094497769366, + "grad_norm": 0.2273503988981247, + "learning_rate": 9.483776960894321e-06, + "loss": 0.048099517822265625, + "step": 9366 + }, + { + "epoch": 0.6331620927403001, + "grad_norm": 0.9988332390785217, + "learning_rate": 9.480708777448395e-06, + "loss": 0.1495819091796875, + "step": 9367 + }, + { + "epoch": 0.6332296877112343, + "grad_norm": 0.6378806829452515, + "learning_rate": 9.477640861060096e-06, + "loss": 0.11721038818359375, + "step": 9368 + }, + { + "epoch": 0.6332972826821684, + "grad_norm": 1.0780367851257324, + "learning_rate": 9.474573211877875e-06, + "loss": 0.1666412353515625, + "step": 9369 + }, + { + "epoch": 0.6333648776531026, + "grad_norm": 0.8749330639839172, + "learning_rate": 9.471505830050165e-06, + "loss": 0.12856674194335938, + "step": 9370 + }, + { + "epoch": 0.6334324726240368, + "grad_norm": 0.30097633600234985, + "learning_rate": 9.46843871572539e-06, + "loss": 0.044742584228515625, + "step": 9371 + }, + { + "epoch": 0.6335000675949709, + "grad_norm": 0.29250243306159973, + "learning_rate": 9.465371869051941e-06, + "loss": 0.04187583923339844, + "step": 9372 + }, + { + "epoch": 0.6335676625659051, + "grad_norm": 0.2517167627811432, + "learning_rate": 9.462305290178232e-06, + "loss": 0.0501708984375, + "step": 9373 + }, + { + "epoch": 0.6336352575368392, + "grad_norm": 1.4121164083480835, + "learning_rate": 9.459238979252625e-06, + "loss": 0.217254638671875, + "step": 9374 + }, + { + "epoch": 0.6337028525077735, + "grad_norm": 0.41078099608421326, + "learning_rate": 9.456172936423493e-06, + "loss": 0.0837554931640625, + "step": 9375 + }, + { + "epoch": 0.6337704474787076, + "grad_norm": 0.5567744374275208, + "learning_rate": 9.453107161839194e-06, + "loss": 0.13726806640625, + "step": 9376 + }, + { + "epoch": 0.6338380424496417, + "grad_norm": 0.7477143406867981, + "learning_rate": 9.450041655648063e-06, + "loss": 0.129638671875, + "step": 9377 + }, + { + "epoch": 0.6339056374205759, + "grad_norm": 0.3663333058357239, + "learning_rate": 9.446976417998432e-06, + "loss": 0.05798149108886719, + "step": 9378 + }, + { + "epoch": 0.63397323239151, + "grad_norm": 0.4716343879699707, + "learning_rate": 9.443911449038614e-06, + "loss": 0.0598297119140625, + "step": 9379 + }, + { + "epoch": 0.6340408273624443, + "grad_norm": 0.40581974387168884, + "learning_rate": 9.44084674891692e-06, + "loss": 0.06139373779296875, + "step": 9380 + }, + { + "epoch": 0.6341084223333784, + "grad_norm": 0.8862906694412231, + "learning_rate": 9.437782317781626e-06, + "loss": 0.1800537109375, + "step": 9381 + }, + { + "epoch": 0.6341760173043126, + "grad_norm": 0.26772984862327576, + "learning_rate": 9.434718155781016e-06, + "loss": 0.0434722900390625, + "step": 9382 + }, + { + "epoch": 0.6342436122752467, + "grad_norm": 0.6356372833251953, + "learning_rate": 9.43165426306335e-06, + "loss": 0.1034698486328125, + "step": 9383 + }, + { + "epoch": 0.6343112072461808, + "grad_norm": 0.5631240010261536, + "learning_rate": 9.428590639776884e-06, + "loss": 0.0918121337890625, + "step": 9384 + }, + { + "epoch": 0.6343788022171151, + "grad_norm": 0.7909572124481201, + "learning_rate": 9.425527286069856e-06, + "loss": 0.11646270751953125, + "step": 9385 + }, + { + "epoch": 0.6344463971880492, + "grad_norm": 0.4673410952091217, + "learning_rate": 9.422464202090475e-06, + "loss": 0.106292724609375, + "step": 9386 + }, + { + "epoch": 0.6345139921589834, + "grad_norm": 0.2539661228656769, + "learning_rate": 9.419401387986965e-06, + "loss": 0.046924591064453125, + "step": 9387 + }, + { + "epoch": 0.6345815871299175, + "grad_norm": 0.46068069338798523, + "learning_rate": 9.416338843907518e-06, + "loss": 0.08137893676757812, + "step": 9388 + }, + { + "epoch": 0.6346491821008517, + "grad_norm": 0.5395643711090088, + "learning_rate": 9.413276570000332e-06, + "loss": 0.0863037109375, + "step": 9389 + }, + { + "epoch": 0.6347167770717859, + "grad_norm": 0.5705057382583618, + "learning_rate": 9.41021456641356e-06, + "loss": 0.1113739013671875, + "step": 9390 + }, + { + "epoch": 0.63478437204272, + "grad_norm": 0.5768349170684814, + "learning_rate": 9.407152833295373e-06, + "loss": 0.0552978515625, + "step": 9391 + }, + { + "epoch": 0.6348519670136542, + "grad_norm": 1.5141663551330566, + "learning_rate": 9.404091370793911e-06, + "loss": 0.205230712890625, + "step": 9392 + }, + { + "epoch": 0.6349195619845883, + "grad_norm": 0.32279929518699646, + "learning_rate": 9.401030179057314e-06, + "loss": 0.0435943603515625, + "step": 9393 + }, + { + "epoch": 0.6349871569555225, + "grad_norm": 0.5190682411193848, + "learning_rate": 9.397969258233692e-06, + "loss": 0.0937337875366211, + "step": 9394 + }, + { + "epoch": 0.6350547519264567, + "grad_norm": 1.7695629596710205, + "learning_rate": 9.394908608471156e-06, + "loss": 0.14557647705078125, + "step": 9395 + }, + { + "epoch": 0.6351223468973909, + "grad_norm": 0.29288676381111145, + "learning_rate": 9.391848229917807e-06, + "loss": 0.034770965576171875, + "step": 9396 + }, + { + "epoch": 0.635189941868325, + "grad_norm": 0.3521639108657837, + "learning_rate": 9.388788122721707e-06, + "loss": 0.05605316162109375, + "step": 9397 + }, + { + "epoch": 0.6352575368392591, + "grad_norm": 1.033281922340393, + "learning_rate": 9.385728287030938e-06, + "loss": 0.1656951904296875, + "step": 9398 + }, + { + "epoch": 0.6353251318101933, + "grad_norm": 0.5420757532119751, + "learning_rate": 9.382668722993547e-06, + "loss": 0.10540771484375, + "step": 9399 + }, + { + "epoch": 0.6353927267811275, + "grad_norm": 0.7087738513946533, + "learning_rate": 9.379609430757583e-06, + "loss": 0.173309326171875, + "step": 9400 + }, + { + "epoch": 0.6354603217520617, + "grad_norm": 1.0989062786102295, + "learning_rate": 9.376550410471061e-06, + "loss": 0.1847381591796875, + "step": 9401 + }, + { + "epoch": 0.6355279167229958, + "grad_norm": 0.2187226414680481, + "learning_rate": 9.373491662282008e-06, + "loss": 0.03353118896484375, + "step": 9402 + }, + { + "epoch": 0.63559551169393, + "grad_norm": 1.3334726095199585, + "learning_rate": 9.370433186338413e-06, + "loss": 0.199676513671875, + "step": 9403 + }, + { + "epoch": 0.6356631066648641, + "grad_norm": 1.2561780214309692, + "learning_rate": 9.367374982788275e-06, + "loss": 0.202728271484375, + "step": 9404 + }, + { + "epoch": 0.6357307016357983, + "grad_norm": 1.0227771997451782, + "learning_rate": 9.364317051779559e-06, + "loss": 0.15087890625, + "step": 9405 + }, + { + "epoch": 0.6357982966067325, + "grad_norm": 0.8158296346664429, + "learning_rate": 9.36125939346024e-06, + "loss": 0.14399337768554688, + "step": 9406 + }, + { + "epoch": 0.6358658915776666, + "grad_norm": 1.2301439046859741, + "learning_rate": 9.358202007978251e-06, + "loss": 0.241973876953125, + "step": 9407 + }, + { + "epoch": 0.6359334865486008, + "grad_norm": 1.4614641666412354, + "learning_rate": 9.355144895481538e-06, + "loss": 0.20410537719726562, + "step": 9408 + }, + { + "epoch": 0.6360010815195349, + "grad_norm": 1.1213423013687134, + "learning_rate": 9.352088056118024e-06, + "loss": 0.1507720947265625, + "step": 9409 + }, + { + "epoch": 0.636068676490469, + "grad_norm": 0.30365845561027527, + "learning_rate": 9.349031490035605e-06, + "loss": 0.05968475341796875, + "step": 9410 + }, + { + "epoch": 0.6361362714614033, + "grad_norm": 1.6135042905807495, + "learning_rate": 9.34597519738219e-06, + "loss": 0.1312103271484375, + "step": 9411 + }, + { + "epoch": 0.6362038664323374, + "grad_norm": 1.263267993927002, + "learning_rate": 9.342919178305655e-06, + "loss": 0.20416259765625, + "step": 9412 + }, + { + "epoch": 0.6362714614032716, + "grad_norm": 0.30630841851234436, + "learning_rate": 9.339863432953878e-06, + "loss": 0.05327415466308594, + "step": 9413 + }, + { + "epoch": 0.6363390563742057, + "grad_norm": 0.2416200488805771, + "learning_rate": 9.336807961474699e-06, + "loss": 0.04792022705078125, + "step": 9414 + }, + { + "epoch": 0.63640665134514, + "grad_norm": 0.25614503026008606, + "learning_rate": 9.333752764015976e-06, + "loss": 0.0366058349609375, + "step": 9415 + }, + { + "epoch": 0.6364742463160741, + "grad_norm": 0.32642266154289246, + "learning_rate": 9.330697840725527e-06, + "loss": 0.07495880126953125, + "step": 9416 + }, + { + "epoch": 0.6365418412870082, + "grad_norm": 0.7360308766365051, + "learning_rate": 9.32764319175118e-06, + "loss": 0.1026153564453125, + "step": 9417 + }, + { + "epoch": 0.6366094362579424, + "grad_norm": 0.42098623514175415, + "learning_rate": 9.324588817240726e-06, + "loss": 0.094085693359375, + "step": 9418 + }, + { + "epoch": 0.6366770312288765, + "grad_norm": 0.6069695949554443, + "learning_rate": 9.321534717341966e-06, + "loss": 0.145538330078125, + "step": 9419 + }, + { + "epoch": 0.6367446261998108, + "grad_norm": 0.33791300654411316, + "learning_rate": 9.318480892202667e-06, + "loss": 0.053661346435546875, + "step": 9420 + }, + { + "epoch": 0.6368122211707449, + "grad_norm": 0.336225688457489, + "learning_rate": 9.315427341970592e-06, + "loss": 0.0667724609375, + "step": 9421 + }, + { + "epoch": 0.6368798161416791, + "grad_norm": 1.0249632596969604, + "learning_rate": 9.312374066793501e-06, + "loss": 0.21490478515625, + "step": 9422 + }, + { + "epoch": 0.6369474111126132, + "grad_norm": 0.309863418340683, + "learning_rate": 9.309321066819119e-06, + "loss": 0.03887939453125, + "step": 9423 + }, + { + "epoch": 0.6370150060835473, + "grad_norm": 0.7691817283630371, + "learning_rate": 9.306268342195173e-06, + "loss": 0.132537841796875, + "step": 9424 + }, + { + "epoch": 0.6370826010544816, + "grad_norm": 0.3921416699886322, + "learning_rate": 9.303215893069373e-06, + "loss": 0.0663299560546875, + "step": 9425 + }, + { + "epoch": 0.6371501960254157, + "grad_norm": 1.0301109552383423, + "learning_rate": 9.30016371958942e-06, + "loss": 0.16209793090820312, + "step": 9426 + }, + { + "epoch": 0.6372177909963499, + "grad_norm": 1.1256886720657349, + "learning_rate": 9.297111821902989e-06, + "loss": 0.229461669921875, + "step": 9427 + }, + { + "epoch": 0.637285385967284, + "grad_norm": 0.24916936457157135, + "learning_rate": 9.294060200157758e-06, + "loss": 0.0424041748046875, + "step": 9428 + }, + { + "epoch": 0.6373529809382182, + "grad_norm": 0.6077307462692261, + "learning_rate": 9.291008854501376e-06, + "loss": 0.118408203125, + "step": 9429 + }, + { + "epoch": 0.6374205759091524, + "grad_norm": 0.49132493138313293, + "learning_rate": 9.287957785081493e-06, + "loss": 0.12579345703125, + "step": 9430 + }, + { + "epoch": 0.6374881708800865, + "grad_norm": 0.5463690757751465, + "learning_rate": 9.284906992045738e-06, + "loss": 0.129547119140625, + "step": 9431 + }, + { + "epoch": 0.6375557658510207, + "grad_norm": 0.5662960410118103, + "learning_rate": 9.281856475541716e-06, + "loss": 0.04892730712890625, + "step": 9432 + }, + { + "epoch": 0.6376233608219548, + "grad_norm": 0.8007811307907104, + "learning_rate": 9.278806235717043e-06, + "loss": 0.11557769775390625, + "step": 9433 + }, + { + "epoch": 0.637690955792889, + "grad_norm": 0.2666448652744293, + "learning_rate": 9.2757562727193e-06, + "loss": 0.0452728271484375, + "step": 9434 + }, + { + "epoch": 0.6377585507638232, + "grad_norm": 0.7057787179946899, + "learning_rate": 9.272706586696075e-06, + "loss": 0.15975189208984375, + "step": 9435 + }, + { + "epoch": 0.6378261457347574, + "grad_norm": 0.6042490005493164, + "learning_rate": 9.269657177794915e-06, + "loss": 0.119293212890625, + "step": 9436 + }, + { + "epoch": 0.6378937407056915, + "grad_norm": 1.2659889459609985, + "learning_rate": 9.266608046163383e-06, + "loss": 0.21221923828125, + "step": 9437 + }, + { + "epoch": 0.6379613356766256, + "grad_norm": 0.837741494178772, + "learning_rate": 9.263559191949003e-06, + "loss": 0.1365509033203125, + "step": 9438 + }, + { + "epoch": 0.6380289306475598, + "grad_norm": 0.32821211218833923, + "learning_rate": 9.260510615299313e-06, + "loss": 0.0508880615234375, + "step": 9439 + }, + { + "epoch": 0.638096525618494, + "grad_norm": 0.7478127479553223, + "learning_rate": 9.257462316361803e-06, + "loss": 0.188690185546875, + "step": 9440 + }, + { + "epoch": 0.6381641205894282, + "grad_norm": 0.6981348991394043, + "learning_rate": 9.254414295283985e-06, + "loss": 0.12250518798828125, + "step": 9441 + }, + { + "epoch": 0.6382317155603623, + "grad_norm": 1.6818745136260986, + "learning_rate": 9.251366552213331e-06, + "loss": 0.157928466796875, + "step": 9442 + }, + { + "epoch": 0.6382993105312965, + "grad_norm": 0.3743435740470886, + "learning_rate": 9.248319087297319e-06, + "loss": 0.08355712890625, + "step": 9443 + }, + { + "epoch": 0.6383669055022306, + "grad_norm": 0.4868007302284241, + "learning_rate": 9.245271900683396e-06, + "loss": 0.0828399658203125, + "step": 9444 + }, + { + "epoch": 0.6384345004731647, + "grad_norm": 0.18240199983119965, + "learning_rate": 9.242224992519004e-06, + "loss": 0.027460098266601562, + "step": 9445 + }, + { + "epoch": 0.638502095444099, + "grad_norm": 0.5352561473846436, + "learning_rate": 9.239178362951581e-06, + "loss": 0.11745452880859375, + "step": 9446 + }, + { + "epoch": 0.6385696904150331, + "grad_norm": 0.3703548014163971, + "learning_rate": 9.236132012128528e-06, + "loss": 0.0714111328125, + "step": 9447 + }, + { + "epoch": 0.6386372853859673, + "grad_norm": 0.6646475195884705, + "learning_rate": 9.233085940197258e-06, + "loss": 0.137786865234375, + "step": 9448 + }, + { + "epoch": 0.6387048803569014, + "grad_norm": 0.7137295007705688, + "learning_rate": 9.23004014730515e-06, + "loss": 0.18377685546875, + "step": 9449 + }, + { + "epoch": 0.6387724753278357, + "grad_norm": 0.8103562593460083, + "learning_rate": 9.226994633599586e-06, + "loss": 0.191070556640625, + "step": 9450 + }, + { + "epoch": 0.6388400702987698, + "grad_norm": 0.4527226388454437, + "learning_rate": 9.22394939922792e-06, + "loss": 0.1119537353515625, + "step": 9451 + }, + { + "epoch": 0.6389076652697039, + "grad_norm": 0.5680040121078491, + "learning_rate": 9.220904444337508e-06, + "loss": 0.12146759033203125, + "step": 9452 + }, + { + "epoch": 0.6389752602406381, + "grad_norm": 1.0448416471481323, + "learning_rate": 9.217859769075673e-06, + "loss": 0.19647216796875, + "step": 9453 + }, + { + "epoch": 0.6390428552115722, + "grad_norm": 1.360230803489685, + "learning_rate": 9.214815373589744e-06, + "loss": 0.187286376953125, + "step": 9454 + }, + { + "epoch": 0.6391104501825065, + "grad_norm": 0.22817714512348175, + "learning_rate": 9.21177125802703e-06, + "loss": 0.040889739990234375, + "step": 9455 + }, + { + "epoch": 0.6391780451534406, + "grad_norm": 0.4533036947250366, + "learning_rate": 9.208727422534811e-06, + "loss": 0.08949661254882812, + "step": 9456 + }, + { + "epoch": 0.6392456401243748, + "grad_norm": 0.8155308961868286, + "learning_rate": 9.20568386726038e-06, + "loss": 0.10479736328125, + "step": 9457 + }, + { + "epoch": 0.6393132350953089, + "grad_norm": 0.8552558422088623, + "learning_rate": 9.202640592350992e-06, + "loss": 0.169158935546875, + "step": 9458 + }, + { + "epoch": 0.639380830066243, + "grad_norm": 0.15079262852668762, + "learning_rate": 9.199597597953915e-06, + "loss": 0.025636672973632812, + "step": 9459 + }, + { + "epoch": 0.6394484250371772, + "grad_norm": 0.24780309200286865, + "learning_rate": 9.196554884216369e-06, + "loss": 0.031795501708984375, + "step": 9460 + }, + { + "epoch": 0.6395160200081114, + "grad_norm": 0.7014284729957581, + "learning_rate": 9.193512451285592e-06, + "loss": 0.1005859375, + "step": 9461 + }, + { + "epoch": 0.6395836149790456, + "grad_norm": 0.16904127597808838, + "learning_rate": 9.190470299308793e-06, + "loss": 0.023995399475097656, + "step": 9462 + }, + { + "epoch": 0.6396512099499797, + "grad_norm": 0.25601136684417725, + "learning_rate": 9.187428428433174e-06, + "loss": 0.0570068359375, + "step": 9463 + }, + { + "epoch": 0.6397188049209139, + "grad_norm": 1.5022307634353638, + "learning_rate": 9.184386838805909e-06, + "loss": 0.15127182006835938, + "step": 9464 + }, + { + "epoch": 0.639786399891848, + "grad_norm": 1.0458430051803589, + "learning_rate": 9.181345530574185e-06, + "loss": 0.15600204467773438, + "step": 9465 + }, + { + "epoch": 0.6398539948627822, + "grad_norm": 0.5901976823806763, + "learning_rate": 9.178304503885142e-06, + "loss": 0.16426849365234375, + "step": 9466 + }, + { + "epoch": 0.6399215898337164, + "grad_norm": 0.8735760450363159, + "learning_rate": 9.175263758885932e-06, + "loss": 0.1620025634765625, + "step": 9467 + }, + { + "epoch": 0.6399891848046505, + "grad_norm": 0.7320772409439087, + "learning_rate": 9.172223295723691e-06, + "loss": 0.1287384033203125, + "step": 9468 + }, + { + "epoch": 0.6400567797755847, + "grad_norm": 0.332114577293396, + "learning_rate": 9.169183114545523e-06, + "loss": 0.05304431915283203, + "step": 9469 + }, + { + "epoch": 0.6401243747465188, + "grad_norm": 0.34628599882125854, + "learning_rate": 9.16614321549854e-06, + "loss": 0.07439804077148438, + "step": 9470 + }, + { + "epoch": 0.6401919697174531, + "grad_norm": 1.2465542554855347, + "learning_rate": 9.163103598729825e-06, + "loss": 0.13956451416015625, + "step": 9471 + }, + { + "epoch": 0.6402595646883872, + "grad_norm": 0.9506198763847351, + "learning_rate": 9.160064264386466e-06, + "loss": 0.11042404174804688, + "step": 9472 + }, + { + "epoch": 0.6403271596593213, + "grad_norm": 0.43449535965919495, + "learning_rate": 9.157025212615506e-06, + "loss": 0.084320068359375, + "step": 9473 + }, + { + "epoch": 0.6403947546302555, + "grad_norm": 0.6707538962364197, + "learning_rate": 9.153986443564011e-06, + "loss": 0.1355438232421875, + "step": 9474 + }, + { + "epoch": 0.6404623496011896, + "grad_norm": 0.2551667094230652, + "learning_rate": 9.150947957379002e-06, + "loss": 0.038234710693359375, + "step": 9475 + }, + { + "epoch": 0.6405299445721239, + "grad_norm": 1.055281639099121, + "learning_rate": 9.147909754207512e-06, + "loss": 0.1614227294921875, + "step": 9476 + }, + { + "epoch": 0.640597539543058, + "grad_norm": 0.4845237135887146, + "learning_rate": 9.144871834196536e-06, + "loss": 0.09074783325195312, + "step": 9477 + }, + { + "epoch": 0.6406651345139922, + "grad_norm": 1.2806742191314697, + "learning_rate": 9.141834197493078e-06, + "loss": 0.2016143798828125, + "step": 9478 + }, + { + "epoch": 0.6407327294849263, + "grad_norm": 0.41422557830810547, + "learning_rate": 9.138796844244112e-06, + "loss": 0.07607269287109375, + "step": 9479 + }, + { + "epoch": 0.6408003244558604, + "grad_norm": 0.422597736120224, + "learning_rate": 9.1357597745966e-06, + "loss": 0.0621490478515625, + "step": 9480 + }, + { + "epoch": 0.6408679194267947, + "grad_norm": 0.2924991548061371, + "learning_rate": 9.132722988697507e-06, + "loss": 0.059234619140625, + "step": 9481 + }, + { + "epoch": 0.6409355143977288, + "grad_norm": 0.8818486332893372, + "learning_rate": 9.129686486693758e-06, + "loss": 0.1533050537109375, + "step": 9482 + }, + { + "epoch": 0.641003109368663, + "grad_norm": 0.5191968679428101, + "learning_rate": 9.126650268732287e-06, + "loss": 0.11296844482421875, + "step": 9483 + }, + { + "epoch": 0.6410707043395971, + "grad_norm": 0.4897157549858093, + "learning_rate": 9.123614334959997e-06, + "loss": 0.086029052734375, + "step": 9484 + }, + { + "epoch": 0.6411382993105313, + "grad_norm": 0.6756875514984131, + "learning_rate": 9.120578685523798e-06, + "loss": 0.1513671875, + "step": 9485 + }, + { + "epoch": 0.6412058942814655, + "grad_norm": 1.2936758995056152, + "learning_rate": 9.117543320570559e-06, + "loss": 0.153839111328125, + "step": 9486 + }, + { + "epoch": 0.6412734892523996, + "grad_norm": 0.7481411695480347, + "learning_rate": 9.114508240247162e-06, + "loss": 0.12908935546875, + "step": 9487 + }, + { + "epoch": 0.6413410842233338, + "grad_norm": 0.2599276900291443, + "learning_rate": 9.111473444700453e-06, + "loss": 0.0435791015625, + "step": 9488 + }, + { + "epoch": 0.6414086791942679, + "grad_norm": 1.3878167867660522, + "learning_rate": 9.108438934077287e-06, + "loss": 0.213592529296875, + "step": 9489 + }, + { + "epoch": 0.6414762741652021, + "grad_norm": 0.5896070599555969, + "learning_rate": 9.10540470852448e-06, + "loss": 0.118560791015625, + "step": 9490 + }, + { + "epoch": 0.6415438691361363, + "grad_norm": 0.4592714309692383, + "learning_rate": 9.102370768188848e-06, + "loss": 0.1053466796875, + "step": 9491 + }, + { + "epoch": 0.6416114641070705, + "grad_norm": 0.2053709328174591, + "learning_rate": 9.099337113217203e-06, + "loss": 0.026698589324951172, + "step": 9492 + }, + { + "epoch": 0.6416790590780046, + "grad_norm": 0.7547297477722168, + "learning_rate": 9.096303743756315e-06, + "loss": 0.12601470947265625, + "step": 9493 + }, + { + "epoch": 0.6417466540489387, + "grad_norm": 0.5625900626182556, + "learning_rate": 9.093270659952974e-06, + "loss": 0.10816192626953125, + "step": 9494 + }, + { + "epoch": 0.6418142490198729, + "grad_norm": 0.19782811403274536, + "learning_rate": 9.090237861953927e-06, + "loss": 0.024383544921875, + "step": 9495 + }, + { + "epoch": 0.641881843990807, + "grad_norm": 0.3934776186943054, + "learning_rate": 9.087205349905926e-06, + "loss": 0.0655670166015625, + "step": 9496 + }, + { + "epoch": 0.6419494389617413, + "grad_norm": 0.3051411211490631, + "learning_rate": 9.0841731239557e-06, + "loss": 0.04703521728515625, + "step": 9497 + }, + { + "epoch": 0.6420170339326754, + "grad_norm": 0.4814693331718445, + "learning_rate": 9.081141184249973e-06, + "loss": 0.0658111572265625, + "step": 9498 + }, + { + "epoch": 0.6420846289036096, + "grad_norm": 0.32624638080596924, + "learning_rate": 9.07810953093544e-06, + "loss": 0.04107666015625, + "step": 9499 + }, + { + "epoch": 0.6421522238745437, + "grad_norm": 0.47550880908966064, + "learning_rate": 9.075078164158799e-06, + "loss": 0.0823211669921875, + "step": 9500 + }, + { + "epoch": 0.6422198188454779, + "grad_norm": 0.28029486536979675, + "learning_rate": 9.072047084066727e-06, + "loss": 0.029998779296875, + "step": 9501 + }, + { + "epoch": 0.6422874138164121, + "grad_norm": 0.22498725354671478, + "learning_rate": 9.069016290805873e-06, + "loss": 0.03301429748535156, + "step": 9502 + }, + { + "epoch": 0.6423550087873462, + "grad_norm": 0.3203832507133484, + "learning_rate": 9.0659857845229e-06, + "loss": 0.0608978271484375, + "step": 9503 + }, + { + "epoch": 0.6424226037582804, + "grad_norm": 0.7056114077568054, + "learning_rate": 9.062955565364436e-06, + "loss": 0.1266326904296875, + "step": 9504 + }, + { + "epoch": 0.6424901987292145, + "grad_norm": 0.7032767534255981, + "learning_rate": 9.059925633477108e-06, + "loss": 0.1103057861328125, + "step": 9505 + }, + { + "epoch": 0.6425577937001488, + "grad_norm": 0.49507543444633484, + "learning_rate": 9.056895989007513e-06, + "loss": 0.08043670654296875, + "step": 9506 + }, + { + "epoch": 0.6426253886710829, + "grad_norm": 0.30571067333221436, + "learning_rate": 9.053866632102254e-06, + "loss": 0.05709075927734375, + "step": 9507 + }, + { + "epoch": 0.642692983642017, + "grad_norm": 0.43525731563568115, + "learning_rate": 9.050837562907903e-06, + "loss": 0.08080101013183594, + "step": 9508 + }, + { + "epoch": 0.6427605786129512, + "grad_norm": 0.490715354681015, + "learning_rate": 9.047808781571034e-06, + "loss": 0.0982208251953125, + "step": 9509 + }, + { + "epoch": 0.6428281735838853, + "grad_norm": 0.9567229747772217, + "learning_rate": 9.044780288238186e-06, + "loss": 0.15055084228515625, + "step": 9510 + }, + { + "epoch": 0.6428957685548196, + "grad_norm": 1.0065991878509521, + "learning_rate": 9.04175208305591e-06, + "loss": 0.157745361328125, + "step": 9511 + }, + { + "epoch": 0.6429633635257537, + "grad_norm": 0.5909623503684998, + "learning_rate": 9.038724166170713e-06, + "loss": 0.1042327880859375, + "step": 9512 + }, + { + "epoch": 0.6430309584966879, + "grad_norm": 0.21859963238239288, + "learning_rate": 9.035696537729119e-06, + "loss": 0.030670166015625, + "step": 9513 + }, + { + "epoch": 0.643098553467622, + "grad_norm": 0.5924269556999207, + "learning_rate": 9.03266919787762e-06, + "loss": 0.1187591552734375, + "step": 9514 + }, + { + "epoch": 0.6431661484385561, + "grad_norm": 1.001357913017273, + "learning_rate": 9.029642146762692e-06, + "loss": 0.125274658203125, + "step": 9515 + }, + { + "epoch": 0.6432337434094904, + "grad_norm": 1.8766803741455078, + "learning_rate": 9.026615384530807e-06, + "loss": 0.2421875, + "step": 9516 + }, + { + "epoch": 0.6433013383804245, + "grad_norm": 0.6062009334564209, + "learning_rate": 9.023588911328415e-06, + "loss": 0.11652755737304688, + "step": 9517 + }, + { + "epoch": 0.6433689333513587, + "grad_norm": 0.9835096001625061, + "learning_rate": 9.020562727301966e-06, + "loss": 0.11869049072265625, + "step": 9518 + }, + { + "epoch": 0.6434365283222928, + "grad_norm": 0.3204514980316162, + "learning_rate": 9.017536832597869e-06, + "loss": 0.0811004638671875, + "step": 9519 + }, + { + "epoch": 0.643504123293227, + "grad_norm": 0.7950150966644287, + "learning_rate": 9.01451122736255e-06, + "loss": 0.12017822265625, + "step": 9520 + }, + { + "epoch": 0.6435717182641612, + "grad_norm": 0.3011973202228546, + "learning_rate": 9.011485911742396e-06, + "loss": 0.04262542724609375, + "step": 9521 + }, + { + "epoch": 0.6436393132350953, + "grad_norm": 0.21209125220775604, + "learning_rate": 9.008460885883805e-06, + "loss": 0.04320526123046875, + "step": 9522 + }, + { + "epoch": 0.6437069082060295, + "grad_norm": 0.7509862184524536, + "learning_rate": 9.00543614993313e-06, + "loss": 0.1281890869140625, + "step": 9523 + }, + { + "epoch": 0.6437745031769636, + "grad_norm": 0.9518669843673706, + "learning_rate": 9.002411704036739e-06, + "loss": 0.173980712890625, + "step": 9524 + }, + { + "epoch": 0.6438420981478978, + "grad_norm": 0.42346301674842834, + "learning_rate": 8.999387548340966e-06, + "loss": 0.049678802490234375, + "step": 9525 + }, + { + "epoch": 0.643909693118832, + "grad_norm": 0.5530701279640198, + "learning_rate": 8.996363682992137e-06, + "loss": 0.09552001953125, + "step": 9526 + }, + { + "epoch": 0.6439772880897662, + "grad_norm": 1.7164998054504395, + "learning_rate": 8.993340108136577e-06, + "loss": 0.264251708984375, + "step": 9527 + }, + { + "epoch": 0.6440448830607003, + "grad_norm": 0.892719566822052, + "learning_rate": 8.990316823920569e-06, + "loss": 0.1413726806640625, + "step": 9528 + }, + { + "epoch": 0.6441124780316344, + "grad_norm": 1.3004255294799805, + "learning_rate": 8.987293830490411e-06, + "loss": 0.195404052734375, + "step": 9529 + }, + { + "epoch": 0.6441800730025686, + "grad_norm": 0.3340810537338257, + "learning_rate": 8.984271127992367e-06, + "loss": 0.04886627197265625, + "step": 9530 + }, + { + "epoch": 0.6442476679735027, + "grad_norm": 0.2374494969844818, + "learning_rate": 8.981248716572705e-06, + "loss": 0.0389251708984375, + "step": 9531 + }, + { + "epoch": 0.644315262944437, + "grad_norm": 0.4876857101917267, + "learning_rate": 8.978226596377652e-06, + "loss": 0.097198486328125, + "step": 9532 + }, + { + "epoch": 0.6443828579153711, + "grad_norm": 0.8623921871185303, + "learning_rate": 8.97520476755345e-06, + "loss": 0.153350830078125, + "step": 9533 + }, + { + "epoch": 0.6444504528863053, + "grad_norm": 0.1987919807434082, + "learning_rate": 8.972183230246303e-06, + "loss": 0.035800933837890625, + "step": 9534 + }, + { + "epoch": 0.6445180478572394, + "grad_norm": 0.8925960063934326, + "learning_rate": 8.969161984602428e-06, + "loss": 0.12539291381835938, + "step": 9535 + }, + { + "epoch": 0.6445856428281735, + "grad_norm": 0.2887953221797943, + "learning_rate": 8.966141030767995e-06, + "loss": 0.0452423095703125, + "step": 9536 + }, + { + "epoch": 0.6446532377991078, + "grad_norm": 0.4386466443538666, + "learning_rate": 8.963120368889183e-06, + "loss": 0.0736083984375, + "step": 9537 + }, + { + "epoch": 0.6447208327700419, + "grad_norm": 0.7011508941650391, + "learning_rate": 8.960099999112156e-06, + "loss": 0.157318115234375, + "step": 9538 + }, + { + "epoch": 0.6447884277409761, + "grad_norm": 0.7346072793006897, + "learning_rate": 8.957079921583046e-06, + "loss": 0.11945343017578125, + "step": 9539 + }, + { + "epoch": 0.6448560227119102, + "grad_norm": 0.9096639156341553, + "learning_rate": 8.954060136447995e-06, + "loss": 0.174530029296875, + "step": 9540 + }, + { + "epoch": 0.6449236176828443, + "grad_norm": 0.4634506106376648, + "learning_rate": 8.95104064385311e-06, + "loss": 0.07964706420898438, + "step": 9541 + }, + { + "epoch": 0.6449912126537786, + "grad_norm": 0.7721431851387024, + "learning_rate": 8.9480214439445e-06, + "loss": 0.127532958984375, + "step": 9542 + }, + { + "epoch": 0.6450588076247127, + "grad_norm": 0.5846282839775085, + "learning_rate": 8.945002536868242e-06, + "loss": 0.0637664794921875, + "step": 9543 + }, + { + "epoch": 0.6451264025956469, + "grad_norm": 0.44316986203193665, + "learning_rate": 8.941983922770427e-06, + "loss": 0.079925537109375, + "step": 9544 + }, + { + "epoch": 0.645193997566581, + "grad_norm": 0.3219142556190491, + "learning_rate": 8.938965601797098e-06, + "loss": 0.045139312744140625, + "step": 9545 + }, + { + "epoch": 0.6452615925375152, + "grad_norm": 0.45661094784736633, + "learning_rate": 8.935947574094309e-06, + "loss": 0.088623046875, + "step": 9546 + }, + { + "epoch": 0.6453291875084494, + "grad_norm": 1.3322687149047852, + "learning_rate": 8.932929839808085e-06, + "loss": 0.14517974853515625, + "step": 9547 + }, + { + "epoch": 0.6453967824793835, + "grad_norm": 0.3997787535190582, + "learning_rate": 8.92991239908445e-06, + "loss": 0.08126068115234375, + "step": 9548 + }, + { + "epoch": 0.6454643774503177, + "grad_norm": 0.29588770866394043, + "learning_rate": 8.926895252069404e-06, + "loss": 0.05212211608886719, + "step": 9549 + }, + { + "epoch": 0.6455319724212518, + "grad_norm": 0.9016171097755432, + "learning_rate": 8.923878398908927e-06, + "loss": 0.163848876953125, + "step": 9550 + }, + { + "epoch": 0.645599567392186, + "grad_norm": 0.8458407521247864, + "learning_rate": 8.920861839749007e-06, + "loss": 0.21575927734375, + "step": 9551 + }, + { + "epoch": 0.6456671623631202, + "grad_norm": 0.25391900539398193, + "learning_rate": 8.917845574735593e-06, + "loss": 0.0375518798828125, + "step": 9552 + }, + { + "epoch": 0.6457347573340544, + "grad_norm": 0.7911462187767029, + "learning_rate": 8.914829604014637e-06, + "loss": 0.1282958984375, + "step": 9553 + }, + { + "epoch": 0.6458023523049885, + "grad_norm": 0.9931989312171936, + "learning_rate": 8.911813927732062e-06, + "loss": 0.1751251220703125, + "step": 9554 + }, + { + "epoch": 0.6458699472759226, + "grad_norm": 0.5816311836242676, + "learning_rate": 8.908798546033799e-06, + "loss": 0.112762451171875, + "step": 9555 + }, + { + "epoch": 0.6459375422468568, + "grad_norm": 0.3532739281654358, + "learning_rate": 8.905783459065739e-06, + "loss": 0.062347412109375, + "step": 9556 + }, + { + "epoch": 0.646005137217791, + "grad_norm": 0.6161189675331116, + "learning_rate": 8.90276866697378e-06, + "loss": 0.10648345947265625, + "step": 9557 + }, + { + "epoch": 0.6460727321887252, + "grad_norm": 0.29062962532043457, + "learning_rate": 8.899754169903782e-06, + "loss": 0.057064056396484375, + "step": 9558 + }, + { + "epoch": 0.6461403271596593, + "grad_norm": 0.5386633276939392, + "learning_rate": 8.896739968001621e-06, + "loss": 0.1284332275390625, + "step": 9559 + }, + { + "epoch": 0.6462079221305935, + "grad_norm": 0.3498024642467499, + "learning_rate": 8.893726061413138e-06, + "loss": 0.02480316162109375, + "step": 9560 + }, + { + "epoch": 0.6462755171015276, + "grad_norm": 0.3556305170059204, + "learning_rate": 8.890712450284155e-06, + "loss": 0.053409576416015625, + "step": 9561 + }, + { + "epoch": 0.6463431120724618, + "grad_norm": 0.9236648678779602, + "learning_rate": 8.887699134760503e-06, + "loss": 0.1544189453125, + "step": 9562 + }, + { + "epoch": 0.646410707043396, + "grad_norm": 0.9408692121505737, + "learning_rate": 8.884686114987973e-06, + "loss": 0.1914520263671875, + "step": 9563 + }, + { + "epoch": 0.6464783020143301, + "grad_norm": 0.794963002204895, + "learning_rate": 8.881673391112365e-06, + "loss": 0.1406707763671875, + "step": 9564 + }, + { + "epoch": 0.6465458969852643, + "grad_norm": 0.24115656316280365, + "learning_rate": 8.878660963279447e-06, + "loss": 0.045108795166015625, + "step": 9565 + }, + { + "epoch": 0.6466134919561984, + "grad_norm": 0.3762570917606354, + "learning_rate": 8.875648831634977e-06, + "loss": 0.047382354736328125, + "step": 9566 + }, + { + "epoch": 0.6466810869271327, + "grad_norm": 1.112441897392273, + "learning_rate": 8.872636996324704e-06, + "loss": 0.206573486328125, + "step": 9567 + }, + { + "epoch": 0.6467486818980668, + "grad_norm": 0.7814299464225769, + "learning_rate": 8.869625457494362e-06, + "loss": 0.11766815185546875, + "step": 9568 + }, + { + "epoch": 0.6468162768690009, + "grad_norm": 0.7682585716247559, + "learning_rate": 8.866614215289662e-06, + "loss": 0.151458740234375, + "step": 9569 + }, + { + "epoch": 0.6468838718399351, + "grad_norm": 0.7587631940841675, + "learning_rate": 8.863603269856312e-06, + "loss": 0.1610565185546875, + "step": 9570 + }, + { + "epoch": 0.6469514668108692, + "grad_norm": 0.2130032181739807, + "learning_rate": 8.860592621339998e-06, + "loss": 0.033077239990234375, + "step": 9571 + }, + { + "epoch": 0.6470190617818035, + "grad_norm": 0.5554805994033813, + "learning_rate": 8.857582269886387e-06, + "loss": 0.09023284912109375, + "step": 9572 + }, + { + "epoch": 0.6470866567527376, + "grad_norm": 1.2273210287094116, + "learning_rate": 8.854572215641154e-06, + "loss": 0.217559814453125, + "step": 9573 + }, + { + "epoch": 0.6471542517236718, + "grad_norm": 0.9429629445075989, + "learning_rate": 8.851562458749928e-06, + "loss": 0.2103271484375, + "step": 9574 + }, + { + "epoch": 0.6472218466946059, + "grad_norm": 0.3739013373851776, + "learning_rate": 8.84855299935835e-06, + "loss": 0.057952880859375, + "step": 9575 + }, + { + "epoch": 0.64728944166554, + "grad_norm": 1.0548979043960571, + "learning_rate": 8.845543837612031e-06, + "loss": 0.13177490234375, + "step": 9576 + }, + { + "epoch": 0.6473570366364743, + "grad_norm": 1.1320693492889404, + "learning_rate": 8.84253497365658e-06, + "loss": 0.1168670654296875, + "step": 9577 + }, + { + "epoch": 0.6474246316074084, + "grad_norm": 0.4260792136192322, + "learning_rate": 8.839526407637576e-06, + "loss": 0.059780120849609375, + "step": 9578 + }, + { + "epoch": 0.6474922265783426, + "grad_norm": 0.5898290872573853, + "learning_rate": 8.836518139700597e-06, + "loss": 0.12255859375, + "step": 9579 + }, + { + "epoch": 0.6475598215492767, + "grad_norm": 1.2125498056411743, + "learning_rate": 8.833510169991198e-06, + "loss": 0.187652587890625, + "step": 9580 + }, + { + "epoch": 0.6476274165202109, + "grad_norm": 0.40193021297454834, + "learning_rate": 8.830502498654932e-06, + "loss": 0.0489349365234375, + "step": 9581 + }, + { + "epoch": 0.6476950114911451, + "grad_norm": 0.30637282133102417, + "learning_rate": 8.827495125837316e-06, + "loss": 0.04024505615234375, + "step": 9582 + }, + { + "epoch": 0.6477626064620792, + "grad_norm": 0.6933283805847168, + "learning_rate": 8.824488051683877e-06, + "loss": 0.114990234375, + "step": 9583 + }, + { + "epoch": 0.6478302014330134, + "grad_norm": 1.5279723405838013, + "learning_rate": 8.821481276340111e-06, + "loss": 0.19464111328125, + "step": 9584 + }, + { + "epoch": 0.6478977964039475, + "grad_norm": 0.5258044600486755, + "learning_rate": 8.818474799951504e-06, + "loss": 0.0845947265625, + "step": 9585 + }, + { + "epoch": 0.6479653913748817, + "grad_norm": 0.6748111844062805, + "learning_rate": 8.815468622663531e-06, + "loss": 0.16681671142578125, + "step": 9586 + }, + { + "epoch": 0.6480329863458159, + "grad_norm": 0.8324787616729736, + "learning_rate": 8.812462744621641e-06, + "loss": 0.11557388305664062, + "step": 9587 + }, + { + "epoch": 0.6481005813167501, + "grad_norm": 0.6300735473632812, + "learning_rate": 8.809457165971288e-06, + "loss": 0.10375213623046875, + "step": 9588 + }, + { + "epoch": 0.6481681762876842, + "grad_norm": 0.6849495768547058, + "learning_rate": 8.806451886857892e-06, + "loss": 0.09326171875, + "step": 9589 + }, + { + "epoch": 0.6482357712586183, + "grad_norm": 1.0091854333877563, + "learning_rate": 8.803446907426878e-06, + "loss": 0.1624755859375, + "step": 9590 + }, + { + "epoch": 0.6483033662295525, + "grad_norm": 0.4362211525440216, + "learning_rate": 8.80044222782363e-06, + "loss": 0.08405303955078125, + "step": 9591 + }, + { + "epoch": 0.6483709612004867, + "grad_norm": 0.6964519619941711, + "learning_rate": 8.797437848193546e-06, + "loss": 0.1334228515625, + "step": 9592 + }, + { + "epoch": 0.6484385561714209, + "grad_norm": 0.3401331305503845, + "learning_rate": 8.794433768681992e-06, + "loss": 0.06378555297851562, + "step": 9593 + }, + { + "epoch": 0.648506151142355, + "grad_norm": 0.4804578423500061, + "learning_rate": 8.791429989434327e-06, + "loss": 0.09333038330078125, + "step": 9594 + }, + { + "epoch": 0.6485737461132892, + "grad_norm": 0.33198127150535583, + "learning_rate": 8.788426510595885e-06, + "loss": 0.05199432373046875, + "step": 9595 + }, + { + "epoch": 0.6486413410842233, + "grad_norm": 0.23692253232002258, + "learning_rate": 8.785423332311998e-06, + "loss": 0.035289764404296875, + "step": 9596 + }, + { + "epoch": 0.6487089360551574, + "grad_norm": 0.6011608839035034, + "learning_rate": 8.782420454727985e-06, + "loss": 0.143829345703125, + "step": 9597 + }, + { + "epoch": 0.6487765310260917, + "grad_norm": 0.6771467924118042, + "learning_rate": 8.77941787798913e-06, + "loss": 0.1062164306640625, + "step": 9598 + }, + { + "epoch": 0.6488441259970258, + "grad_norm": 0.410273939371109, + "learning_rate": 8.776415602240724e-06, + "loss": 0.1010589599609375, + "step": 9599 + }, + { + "epoch": 0.64891172096796, + "grad_norm": 0.9409255385398865, + "learning_rate": 8.773413627628034e-06, + "loss": 0.174468994140625, + "step": 9600 + }, + { + "epoch": 0.6489793159388941, + "grad_norm": 0.9927771687507629, + "learning_rate": 8.770411954296322e-06, + "loss": 0.1896209716796875, + "step": 9601 + }, + { + "epoch": 0.6490469109098284, + "grad_norm": 0.5189913511276245, + "learning_rate": 8.767410582390817e-06, + "loss": 0.115875244140625, + "step": 9602 + }, + { + "epoch": 0.6491145058807625, + "grad_norm": 0.2361798733472824, + "learning_rate": 8.764409512056751e-06, + "loss": 0.03424072265625, + "step": 9603 + }, + { + "epoch": 0.6491821008516966, + "grad_norm": 1.0071617364883423, + "learning_rate": 8.761408743439326e-06, + "loss": 0.1437530517578125, + "step": 9604 + }, + { + "epoch": 0.6492496958226308, + "grad_norm": 0.2491907775402069, + "learning_rate": 8.758408276683745e-06, + "loss": 0.03580284118652344, + "step": 9605 + }, + { + "epoch": 0.6493172907935649, + "grad_norm": 0.45752212405204773, + "learning_rate": 8.755408111935195e-06, + "loss": 0.06647109985351562, + "step": 9606 + }, + { + "epoch": 0.6493848857644992, + "grad_norm": 0.37884703278541565, + "learning_rate": 8.752408249338823e-06, + "loss": 0.08331298828125, + "step": 9607 + }, + { + "epoch": 0.6494524807354333, + "grad_norm": 0.21622657775878906, + "learning_rate": 8.749408689039806e-06, + "loss": 0.033794403076171875, + "step": 9608 + }, + { + "epoch": 0.6495200757063675, + "grad_norm": 0.5721669793128967, + "learning_rate": 8.746409431183256e-06, + "loss": 0.1145782470703125, + "step": 9609 + }, + { + "epoch": 0.6495876706773016, + "grad_norm": 0.5118132829666138, + "learning_rate": 8.743410475914315e-06, + "loss": 0.07752227783203125, + "step": 9610 + }, + { + "epoch": 0.6496552656482357, + "grad_norm": 0.3242608308792114, + "learning_rate": 8.740411823378084e-06, + "loss": 0.04736328125, + "step": 9611 + }, + { + "epoch": 0.64972286061917, + "grad_norm": 0.26278358697891235, + "learning_rate": 8.737413473719658e-06, + "loss": 0.05100250244140625, + "step": 9612 + }, + { + "epoch": 0.6497904555901041, + "grad_norm": 0.3907698690891266, + "learning_rate": 8.734415427084114e-06, + "loss": 0.08362579345703125, + "step": 9613 + }, + { + "epoch": 0.6498580505610383, + "grad_norm": 0.28480279445648193, + "learning_rate": 8.731417683616518e-06, + "loss": 0.04987907409667969, + "step": 9614 + }, + { + "epoch": 0.6499256455319724, + "grad_norm": 0.4689905047416687, + "learning_rate": 8.728420243461912e-06, + "loss": 0.073944091796875, + "step": 9615 + }, + { + "epoch": 0.6499932405029066, + "grad_norm": 0.17234838008880615, + "learning_rate": 8.725423106765348e-06, + "loss": 0.03191375732421875, + "step": 9616 + }, + { + "epoch": 0.6500608354738407, + "grad_norm": 0.736259937286377, + "learning_rate": 8.722426273671823e-06, + "loss": 0.1094970703125, + "step": 9617 + }, + { + "epoch": 0.6501284304447749, + "grad_norm": 0.4851192831993103, + "learning_rate": 8.719429744326366e-06, + "loss": 0.08110427856445312, + "step": 9618 + }, + { + "epoch": 0.6501960254157091, + "grad_norm": 0.5692322254180908, + "learning_rate": 8.716433518873952e-06, + "loss": 0.08081817626953125, + "step": 9619 + }, + { + "epoch": 0.6502636203866432, + "grad_norm": 0.6627504229545593, + "learning_rate": 8.713437597459556e-06, + "loss": 0.10794830322265625, + "step": 9620 + }, + { + "epoch": 0.6503312153575774, + "grad_norm": 0.4437999725341797, + "learning_rate": 8.710441980228156e-06, + "loss": 0.08652496337890625, + "step": 9621 + }, + { + "epoch": 0.6503988103285115, + "grad_norm": 0.39234432578086853, + "learning_rate": 8.707446667324677e-06, + "loss": 0.0912322998046875, + "step": 9622 + }, + { + "epoch": 0.6504664052994458, + "grad_norm": 0.40958812832832336, + "learning_rate": 8.704451658894064e-06, + "loss": 0.0926513671875, + "step": 9623 + }, + { + "epoch": 0.6505340002703799, + "grad_norm": 0.4742828607559204, + "learning_rate": 8.701456955081233e-06, + "loss": 0.049053192138671875, + "step": 9624 + }, + { + "epoch": 0.650601595241314, + "grad_norm": 0.2618084251880646, + "learning_rate": 8.698462556031086e-06, + "loss": 0.046009063720703125, + "step": 9625 + }, + { + "epoch": 0.6506691902122482, + "grad_norm": 0.3168089985847473, + "learning_rate": 8.695468461888507e-06, + "loss": 0.0654296875, + "step": 9626 + }, + { + "epoch": 0.6507367851831823, + "grad_norm": 0.3183872401714325, + "learning_rate": 8.692474672798372e-06, + "loss": 0.061954498291015625, + "step": 9627 + }, + { + "epoch": 0.6508043801541166, + "grad_norm": 1.1451621055603027, + "learning_rate": 8.689481188905534e-06, + "loss": 0.21075439453125, + "step": 9628 + }, + { + "epoch": 0.6508719751250507, + "grad_norm": 0.9482662081718445, + "learning_rate": 8.68648801035485e-06, + "loss": 0.1839599609375, + "step": 9629 + }, + { + "epoch": 0.6509395700959849, + "grad_norm": 0.7047007083892822, + "learning_rate": 8.683495137291134e-06, + "loss": 0.102996826171875, + "step": 9630 + }, + { + "epoch": 0.651007165066919, + "grad_norm": 0.4026961922645569, + "learning_rate": 8.680502569859208e-06, + "loss": 0.07034683227539062, + "step": 9631 + }, + { + "epoch": 0.6510747600378531, + "grad_norm": 0.7430049180984497, + "learning_rate": 8.677510308203866e-06, + "loss": 0.130340576171875, + "step": 9632 + }, + { + "epoch": 0.6511423550087874, + "grad_norm": 0.5778307914733887, + "learning_rate": 8.674518352469888e-06, + "loss": 0.11909866333007812, + "step": 9633 + }, + { + "epoch": 0.6512099499797215, + "grad_norm": 0.9475836753845215, + "learning_rate": 8.671526702802064e-06, + "loss": 0.15105819702148438, + "step": 9634 + }, + { + "epoch": 0.6512775449506557, + "grad_norm": 0.47334024310112, + "learning_rate": 8.66853535934512e-06, + "loss": 0.10011100769042969, + "step": 9635 + }, + { + "epoch": 0.6513451399215898, + "grad_norm": 0.3317318558692932, + "learning_rate": 8.665544322243818e-06, + "loss": 0.05740165710449219, + "step": 9636 + }, + { + "epoch": 0.651412734892524, + "grad_norm": 0.33645719289779663, + "learning_rate": 8.662553591642873e-06, + "loss": 0.048980712890625, + "step": 9637 + }, + { + "epoch": 0.6514803298634582, + "grad_norm": 0.2673591077327728, + "learning_rate": 8.659563167687e-06, + "loss": 0.041412353515625, + "step": 9638 + }, + { + "epoch": 0.6515479248343923, + "grad_norm": 1.1622121334075928, + "learning_rate": 8.65657305052089e-06, + "loss": 0.1700439453125, + "step": 9639 + }, + { + "epoch": 0.6516155198053265, + "grad_norm": 1.4729344844818115, + "learning_rate": 8.653583240289227e-06, + "loss": 0.08824920654296875, + "step": 9640 + }, + { + "epoch": 0.6516831147762606, + "grad_norm": 0.30604612827301025, + "learning_rate": 8.650593737136672e-06, + "loss": 0.061473846435546875, + "step": 9641 + }, + { + "epoch": 0.6517507097471948, + "grad_norm": 0.9388854503631592, + "learning_rate": 8.64760454120788e-06, + "loss": 0.1405487060546875, + "step": 9642 + }, + { + "epoch": 0.651818304718129, + "grad_norm": 0.45601359009742737, + "learning_rate": 8.644615652647486e-06, + "loss": 0.088775634765625, + "step": 9643 + }, + { + "epoch": 0.6518858996890632, + "grad_norm": 1.0359134674072266, + "learning_rate": 8.641627071600103e-06, + "loss": 0.183990478515625, + "step": 9644 + }, + { + "epoch": 0.6519534946599973, + "grad_norm": 0.37993112206459045, + "learning_rate": 8.638638798210359e-06, + "loss": 0.09398651123046875, + "step": 9645 + }, + { + "epoch": 0.6520210896309314, + "grad_norm": 0.22601598501205444, + "learning_rate": 8.635650832622817e-06, + "loss": 0.03381538391113281, + "step": 9646 + }, + { + "epoch": 0.6520886846018656, + "grad_norm": 0.7170881628990173, + "learning_rate": 8.63266317498208e-06, + "loss": 0.1369171142578125, + "step": 9647 + }, + { + "epoch": 0.6521562795727998, + "grad_norm": 1.0295982360839844, + "learning_rate": 8.629675825432684e-06, + "loss": 0.1576385498046875, + "step": 9648 + }, + { + "epoch": 0.652223874543734, + "grad_norm": 0.9427034854888916, + "learning_rate": 8.626688784119193e-06, + "loss": 0.1080780029296875, + "step": 9649 + }, + { + "epoch": 0.6522914695146681, + "grad_norm": 0.5851935744285583, + "learning_rate": 8.623702051186136e-06, + "loss": 0.12964248657226562, + "step": 9650 + }, + { + "epoch": 0.6523590644856023, + "grad_norm": 0.3491479158401489, + "learning_rate": 8.620715626778026e-06, + "loss": 0.0768585205078125, + "step": 9651 + }, + { + "epoch": 0.6524266594565364, + "grad_norm": 0.6058950424194336, + "learning_rate": 8.617729511039368e-06, + "loss": 0.13443756103515625, + "step": 9652 + }, + { + "epoch": 0.6524942544274706, + "grad_norm": 1.2941045761108398, + "learning_rate": 8.614743704114647e-06, + "loss": 0.1682891845703125, + "step": 9653 + }, + { + "epoch": 0.6525618493984048, + "grad_norm": 0.23275156319141388, + "learning_rate": 8.611758206148334e-06, + "loss": 0.048065185546875, + "step": 9654 + }, + { + "epoch": 0.6526294443693389, + "grad_norm": 0.9688998460769653, + "learning_rate": 8.608773017284887e-06, + "loss": 0.1504974365234375, + "step": 9655 + }, + { + "epoch": 0.6526970393402731, + "grad_norm": 0.7878175377845764, + "learning_rate": 8.605788137668748e-06, + "loss": 0.1336517333984375, + "step": 9656 + }, + { + "epoch": 0.6527646343112072, + "grad_norm": 0.6809964776039124, + "learning_rate": 8.602803567444339e-06, + "loss": 0.11089324951171875, + "step": 9657 + }, + { + "epoch": 0.6528322292821415, + "grad_norm": 0.7971547245979309, + "learning_rate": 8.599819306756088e-06, + "loss": 0.1473388671875, + "step": 9658 + }, + { + "epoch": 0.6528998242530756, + "grad_norm": 0.5923384428024292, + "learning_rate": 8.59683535574837e-06, + "loss": 0.08982467651367188, + "step": 9659 + }, + { + "epoch": 0.6529674192240097, + "grad_norm": 0.9904810786247253, + "learning_rate": 8.593851714565585e-06, + "loss": 0.16748046875, + "step": 9660 + }, + { + "epoch": 0.6530350141949439, + "grad_norm": 1.1228351593017578, + "learning_rate": 8.590868383352093e-06, + "loss": 0.128082275390625, + "step": 9661 + }, + { + "epoch": 0.653102609165878, + "grad_norm": 0.5901855826377869, + "learning_rate": 8.587885362252246e-06, + "loss": 0.09526824951171875, + "step": 9662 + }, + { + "epoch": 0.6531702041368123, + "grad_norm": 0.6489430069923401, + "learning_rate": 8.584902651410382e-06, + "loss": 0.127593994140625, + "step": 9663 + }, + { + "epoch": 0.6532377991077464, + "grad_norm": 0.17515257000923157, + "learning_rate": 8.58192025097082e-06, + "loss": 0.036746978759765625, + "step": 9664 + }, + { + "epoch": 0.6533053940786806, + "grad_norm": 1.118507981300354, + "learning_rate": 8.578938161077874e-06, + "loss": 0.198333740234375, + "step": 9665 + }, + { + "epoch": 0.6533729890496147, + "grad_norm": 0.4261959493160248, + "learning_rate": 8.575956381875831e-06, + "loss": 0.065643310546875, + "step": 9666 + }, + { + "epoch": 0.6534405840205488, + "grad_norm": 0.2826012074947357, + "learning_rate": 8.572974913508967e-06, + "loss": 0.053470611572265625, + "step": 9667 + }, + { + "epoch": 0.6535081789914831, + "grad_norm": 0.21596691012382507, + "learning_rate": 8.569993756121548e-06, + "loss": 0.03841400146484375, + "step": 9668 + }, + { + "epoch": 0.6535757739624172, + "grad_norm": 0.23254674673080444, + "learning_rate": 8.567012909857819e-06, + "loss": 0.04360198974609375, + "step": 9669 + }, + { + "epoch": 0.6536433689333514, + "grad_norm": 1.5520859956741333, + "learning_rate": 8.564032374862004e-06, + "loss": 0.210906982421875, + "step": 9670 + }, + { + "epoch": 0.6537109639042855, + "grad_norm": 1.367783546447754, + "learning_rate": 8.56105215127834e-06, + "loss": 0.226470947265625, + "step": 9671 + }, + { + "epoch": 0.6537785588752196, + "grad_norm": 0.6168718934059143, + "learning_rate": 8.558072239251004e-06, + "loss": 0.1170654296875, + "step": 9672 + }, + { + "epoch": 0.6538461538461539, + "grad_norm": 0.8716182112693787, + "learning_rate": 8.555092638924203e-06, + "loss": 0.160125732421875, + "step": 9673 + }, + { + "epoch": 0.653913748817088, + "grad_norm": 0.6497346758842468, + "learning_rate": 8.5521133504421e-06, + "loss": 0.08751296997070312, + "step": 9674 + }, + { + "epoch": 0.6539813437880222, + "grad_norm": 0.8029664158821106, + "learning_rate": 8.549134373948851e-06, + "loss": 0.17486572265625, + "step": 9675 + }, + { + "epoch": 0.6540489387589563, + "grad_norm": 0.5380277037620544, + "learning_rate": 8.546155709588604e-06, + "loss": 0.07318115234375, + "step": 9676 + }, + { + "epoch": 0.6541165337298905, + "grad_norm": 0.3050127327442169, + "learning_rate": 8.54317735750547e-06, + "loss": 0.06316375732421875, + "step": 9677 + }, + { + "epoch": 0.6541841287008247, + "grad_norm": 0.5732821226119995, + "learning_rate": 8.540199317843576e-06, + "loss": 0.09568023681640625, + "step": 9678 + }, + { + "epoch": 0.6542517236717588, + "grad_norm": 0.4144614040851593, + "learning_rate": 8.53722159074701e-06, + "loss": 0.077117919921875, + "step": 9679 + }, + { + "epoch": 0.654319318642693, + "grad_norm": 0.3914666175842285, + "learning_rate": 8.534244176359855e-06, + "loss": 0.0723876953125, + "step": 9680 + }, + { + "epoch": 0.6543869136136271, + "grad_norm": 0.26806557178497314, + "learning_rate": 8.531267074826178e-06, + "loss": 0.05265045166015625, + "step": 9681 + }, + { + "epoch": 0.6544545085845613, + "grad_norm": 0.37790384888648987, + "learning_rate": 8.52829028629003e-06, + "loss": 0.06951713562011719, + "step": 9682 + }, + { + "epoch": 0.6545221035554954, + "grad_norm": 0.28793030977249146, + "learning_rate": 8.525313810895437e-06, + "loss": 0.05667877197265625, + "step": 9683 + }, + { + "epoch": 0.6545896985264297, + "grad_norm": 0.9240916967391968, + "learning_rate": 8.522337648786439e-06, + "loss": 0.1602783203125, + "step": 9684 + }, + { + "epoch": 0.6546572934973638, + "grad_norm": 0.29390090703964233, + "learning_rate": 8.519361800107019e-06, + "loss": 0.038155555725097656, + "step": 9685 + }, + { + "epoch": 0.6547248884682979, + "grad_norm": 0.9438192844390869, + "learning_rate": 8.516386265001183e-06, + "loss": 0.119964599609375, + "step": 9686 + }, + { + "epoch": 0.6547924834392321, + "grad_norm": 0.8035265207290649, + "learning_rate": 8.513411043612899e-06, + "loss": 0.13339996337890625, + "step": 9687 + }, + { + "epoch": 0.6548600784101662, + "grad_norm": 0.20252566039562225, + "learning_rate": 8.510436136086132e-06, + "loss": 0.026388168334960938, + "step": 9688 + }, + { + "epoch": 0.6549276733811005, + "grad_norm": 0.3734416365623474, + "learning_rate": 8.50746154256482e-06, + "loss": 0.05748748779296875, + "step": 9689 + }, + { + "epoch": 0.6549952683520346, + "grad_norm": 0.4268476665019989, + "learning_rate": 8.504487263192897e-06, + "loss": 0.06392288208007812, + "step": 9690 + }, + { + "epoch": 0.6550628633229688, + "grad_norm": 0.5447491407394409, + "learning_rate": 8.501513298114273e-06, + "loss": 0.11095428466796875, + "step": 9691 + }, + { + "epoch": 0.6551304582939029, + "grad_norm": 0.31690964102745056, + "learning_rate": 8.498539647472852e-06, + "loss": 0.066009521484375, + "step": 9692 + }, + { + "epoch": 0.655198053264837, + "grad_norm": 0.9013522863388062, + "learning_rate": 8.495566311412514e-06, + "loss": 0.15288543701171875, + "step": 9693 + }, + { + "epoch": 0.6552656482357713, + "grad_norm": 1.121224045753479, + "learning_rate": 8.492593290077129e-06, + "loss": 0.172027587890625, + "step": 9694 + }, + { + "epoch": 0.6553332432067054, + "grad_norm": 1.326540470123291, + "learning_rate": 8.48962058361055e-06, + "loss": 0.160614013671875, + "step": 9695 + }, + { + "epoch": 0.6554008381776396, + "grad_norm": 0.2900025248527527, + "learning_rate": 8.486648192156608e-06, + "loss": 0.047389984130859375, + "step": 9696 + }, + { + "epoch": 0.6554684331485737, + "grad_norm": 1.1139390468597412, + "learning_rate": 8.483676115859144e-06, + "loss": 0.16463470458984375, + "step": 9697 + }, + { + "epoch": 0.655536028119508, + "grad_norm": 0.39357638359069824, + "learning_rate": 8.480704354861944e-06, + "loss": 0.062530517578125, + "step": 9698 + }, + { + "epoch": 0.6556036230904421, + "grad_norm": 1.5372519493103027, + "learning_rate": 8.477732909308813e-06, + "loss": 0.17376708984375, + "step": 9699 + }, + { + "epoch": 0.6556712180613762, + "grad_norm": 0.2910815477371216, + "learning_rate": 8.474761779343535e-06, + "loss": 0.04953765869140625, + "step": 9700 + }, + { + "epoch": 0.6557388130323104, + "grad_norm": 0.35689494013786316, + "learning_rate": 8.471790965109847e-06, + "loss": 0.078643798828125, + "step": 9701 + }, + { + "epoch": 0.6558064080032445, + "grad_norm": 0.19546334445476532, + "learning_rate": 8.46882046675152e-06, + "loss": 0.02748870849609375, + "step": 9702 + }, + { + "epoch": 0.6558740029741787, + "grad_norm": 0.5223133563995361, + "learning_rate": 8.465850284412274e-06, + "loss": 0.06854438781738281, + "step": 9703 + }, + { + "epoch": 0.6559415979451129, + "grad_norm": 0.4726313352584839, + "learning_rate": 8.462880418235826e-06, + "loss": 0.09869003295898438, + "step": 9704 + }, + { + "epoch": 0.6560091929160471, + "grad_norm": 1.0600309371948242, + "learning_rate": 8.459910868365878e-06, + "loss": 0.14969635009765625, + "step": 9705 + }, + { + "epoch": 0.6560767878869812, + "grad_norm": 1.0818573236465454, + "learning_rate": 8.456941634946115e-06, + "loss": 0.1511993408203125, + "step": 9706 + }, + { + "epoch": 0.6561443828579153, + "grad_norm": 0.38311171531677246, + "learning_rate": 8.4539727181202e-06, + "loss": 0.05536651611328125, + "step": 9707 + }, + { + "epoch": 0.6562119778288495, + "grad_norm": 0.3679342567920685, + "learning_rate": 8.451004118031805e-06, + "loss": 0.057952880859375, + "step": 9708 + }, + { + "epoch": 0.6562795727997837, + "grad_norm": 0.3339264690876007, + "learning_rate": 8.448035834824548e-06, + "loss": 0.06383132934570312, + "step": 9709 + }, + { + "epoch": 0.6563471677707179, + "grad_norm": 0.6997555494308472, + "learning_rate": 8.445067868642075e-06, + "loss": 0.1540374755859375, + "step": 9710 + }, + { + "epoch": 0.656414762741652, + "grad_norm": 0.4547373950481415, + "learning_rate": 8.44210021962798e-06, + "loss": 0.0543365478515625, + "step": 9711 + }, + { + "epoch": 0.6564823577125862, + "grad_norm": 1.0810750722885132, + "learning_rate": 8.43913288792585e-06, + "loss": 0.174957275390625, + "step": 9712 + }, + { + "epoch": 0.6565499526835203, + "grad_norm": 0.6183560490608215, + "learning_rate": 8.436165873679286e-06, + "loss": 0.12149810791015625, + "step": 9713 + }, + { + "epoch": 0.6566175476544545, + "grad_norm": 0.5714925527572632, + "learning_rate": 8.433199177031825e-06, + "loss": 0.08661270141601562, + "step": 9714 + }, + { + "epoch": 0.6566851426253887, + "grad_norm": 1.812876582145691, + "learning_rate": 8.43023279812703e-06, + "loss": 0.247283935546875, + "step": 9715 + }, + { + "epoch": 0.6567527375963228, + "grad_norm": 0.46265360713005066, + "learning_rate": 8.427266737108432e-06, + "loss": 0.1086273193359375, + "step": 9716 + }, + { + "epoch": 0.656820332567257, + "grad_norm": 0.5381368398666382, + "learning_rate": 8.424300994119544e-06, + "loss": 0.11856460571289062, + "step": 9717 + }, + { + "epoch": 0.6568879275381911, + "grad_norm": 0.34136566519737244, + "learning_rate": 8.421335569303867e-06, + "loss": 0.0596923828125, + "step": 9718 + }, + { + "epoch": 0.6569555225091254, + "grad_norm": 1.0918000936508179, + "learning_rate": 8.41837046280489e-06, + "loss": 0.213531494140625, + "step": 9719 + }, + { + "epoch": 0.6570231174800595, + "grad_norm": 1.055883765220642, + "learning_rate": 8.415405674766071e-06, + "loss": 0.1450653076171875, + "step": 9720 + }, + { + "epoch": 0.6570907124509936, + "grad_norm": 0.32197698950767517, + "learning_rate": 8.412441205330888e-06, + "loss": 0.070159912109375, + "step": 9721 + }, + { + "epoch": 0.6571583074219278, + "grad_norm": 0.30763402581214905, + "learning_rate": 8.409477054642756e-06, + "loss": 0.04772186279296875, + "step": 9722 + }, + { + "epoch": 0.6572259023928619, + "grad_norm": 0.4781642556190491, + "learning_rate": 8.406513222845121e-06, + "loss": 0.05124092102050781, + "step": 9723 + }, + { + "epoch": 0.6572934973637962, + "grad_norm": 0.20582318305969238, + "learning_rate": 8.403549710081375e-06, + "loss": 0.035160064697265625, + "step": 9724 + }, + { + "epoch": 0.6573610923347303, + "grad_norm": 0.21237781643867493, + "learning_rate": 8.400586516494913e-06, + "loss": 0.02954864501953125, + "step": 9725 + }, + { + "epoch": 0.6574286873056645, + "grad_norm": 0.8398540616035461, + "learning_rate": 8.397623642229126e-06, + "loss": 0.11435794830322266, + "step": 9726 + }, + { + "epoch": 0.6574962822765986, + "grad_norm": 0.2024030089378357, + "learning_rate": 8.394661087427355e-06, + "loss": 0.035741329193115234, + "step": 9727 + }, + { + "epoch": 0.6575638772475327, + "grad_norm": 1.0688468217849731, + "learning_rate": 8.391698852232965e-06, + "loss": 0.122589111328125, + "step": 9728 + }, + { + "epoch": 0.657631472218467, + "grad_norm": 0.7610977292060852, + "learning_rate": 8.388736936789281e-06, + "loss": 0.15850830078125, + "step": 9729 + }, + { + "epoch": 0.6576990671894011, + "grad_norm": 0.42936813831329346, + "learning_rate": 8.38577534123962e-06, + "loss": 0.06646728515625, + "step": 9730 + }, + { + "epoch": 0.6577666621603353, + "grad_norm": 0.8797276616096497, + "learning_rate": 8.38281406572728e-06, + "loss": 0.15030670166015625, + "step": 9731 + }, + { + "epoch": 0.6578342571312694, + "grad_norm": 0.8201127052307129, + "learning_rate": 8.37985311039555e-06, + "loss": 0.17218017578125, + "step": 9732 + }, + { + "epoch": 0.6579018521022036, + "grad_norm": 0.29539698362350464, + "learning_rate": 8.37689247538769e-06, + "loss": 0.038547515869140625, + "step": 9733 + }, + { + "epoch": 0.6579694470731378, + "grad_norm": 0.4563271701335907, + "learning_rate": 8.37393216084697e-06, + "loss": 0.08807373046875, + "step": 9734 + }, + { + "epoch": 0.6580370420440719, + "grad_norm": 0.3869382441043854, + "learning_rate": 8.370972166916616e-06, + "loss": 0.08316802978515625, + "step": 9735 + }, + { + "epoch": 0.6581046370150061, + "grad_norm": 0.3290581703186035, + "learning_rate": 8.368012493739847e-06, + "loss": 0.049072265625, + "step": 9736 + }, + { + "epoch": 0.6581722319859402, + "grad_norm": 0.6460148692131042, + "learning_rate": 8.36505314145989e-06, + "loss": 0.14447021484375, + "step": 9737 + }, + { + "epoch": 0.6582398269568744, + "grad_norm": 0.40693312883377075, + "learning_rate": 8.362094110219911e-06, + "loss": 0.08038330078125, + "step": 9738 + }, + { + "epoch": 0.6583074219278086, + "grad_norm": 0.9671489596366882, + "learning_rate": 8.359135400163111e-06, + "loss": 0.13348388671875, + "step": 9739 + }, + { + "epoch": 0.6583750168987428, + "grad_norm": 0.7563462257385254, + "learning_rate": 8.35617701143263e-06, + "loss": 0.09554672241210938, + "step": 9740 + }, + { + "epoch": 0.6584426118696769, + "grad_norm": 0.2629068195819855, + "learning_rate": 8.353218944171627e-06, + "loss": 0.053760528564453125, + "step": 9741 + }, + { + "epoch": 0.658510206840611, + "grad_norm": 0.3070227801799774, + "learning_rate": 8.350261198523229e-06, + "loss": 0.05318450927734375, + "step": 9742 + }, + { + "epoch": 0.6585778018115452, + "grad_norm": 0.9133813977241516, + "learning_rate": 8.347303774630547e-06, + "loss": 0.1474609375, + "step": 9743 + }, + { + "epoch": 0.6586453967824794, + "grad_norm": 0.5195844173431396, + "learning_rate": 8.344346672636681e-06, + "loss": 0.11855316162109375, + "step": 9744 + }, + { + "epoch": 0.6587129917534136, + "grad_norm": 0.22927410900592804, + "learning_rate": 8.341389892684716e-06, + "loss": 0.040340423583984375, + "step": 9745 + }, + { + "epoch": 0.6587805867243477, + "grad_norm": 0.5475461483001709, + "learning_rate": 8.338433434917708e-06, + "loss": 0.098724365234375, + "step": 9746 + }, + { + "epoch": 0.6588481816952819, + "grad_norm": 1.1939043998718262, + "learning_rate": 8.335477299478733e-06, + "loss": 0.176910400390625, + "step": 9747 + }, + { + "epoch": 0.658915776666216, + "grad_norm": 0.18299493193626404, + "learning_rate": 8.332521486510806e-06, + "loss": 0.030366897583007812, + "step": 9748 + }, + { + "epoch": 0.6589833716371502, + "grad_norm": 0.3894447684288025, + "learning_rate": 8.329565996156946e-06, + "loss": 0.06195831298828125, + "step": 9749 + }, + { + "epoch": 0.6590509666080844, + "grad_norm": 1.2024919986724854, + "learning_rate": 8.326610828560182e-06, + "loss": 0.13510894775390625, + "step": 9750 + }, + { + "epoch": 0.6591185615790185, + "grad_norm": 0.7270756363868713, + "learning_rate": 8.323655983863471e-06, + "loss": 0.17791748046875, + "step": 9751 + }, + { + "epoch": 0.6591861565499527, + "grad_norm": 0.5409939885139465, + "learning_rate": 8.320701462209812e-06, + "loss": 0.0745697021484375, + "step": 9752 + }, + { + "epoch": 0.6592537515208868, + "grad_norm": 0.5184230208396912, + "learning_rate": 8.317747263742155e-06, + "loss": 0.08774566650390625, + "step": 9753 + }, + { + "epoch": 0.6593213464918211, + "grad_norm": 0.8744737505912781, + "learning_rate": 8.314793388603439e-06, + "loss": 0.11316680908203125, + "step": 9754 + }, + { + "epoch": 0.6593889414627552, + "grad_norm": 0.2914629876613617, + "learning_rate": 8.311839836936596e-06, + "loss": 0.041717529296875, + "step": 9755 + }, + { + "epoch": 0.6594565364336893, + "grad_norm": 0.8129464387893677, + "learning_rate": 8.308886608884534e-06, + "loss": 0.12621307373046875, + "step": 9756 + }, + { + "epoch": 0.6595241314046235, + "grad_norm": 0.22493726015090942, + "learning_rate": 8.305933704590149e-06, + "loss": 0.037303924560546875, + "step": 9757 + }, + { + "epoch": 0.6595917263755576, + "grad_norm": 0.6829808354377747, + "learning_rate": 8.302981124196322e-06, + "loss": 0.1103057861328125, + "step": 9758 + }, + { + "epoch": 0.6596593213464919, + "grad_norm": 1.0903209447860718, + "learning_rate": 8.300028867845919e-06, + "loss": 0.10465240478515625, + "step": 9759 + }, + { + "epoch": 0.659726916317426, + "grad_norm": 0.19415688514709473, + "learning_rate": 8.297076935681782e-06, + "loss": 0.02777099609375, + "step": 9760 + }, + { + "epoch": 0.6597945112883602, + "grad_norm": 1.0116130113601685, + "learning_rate": 8.29412532784675e-06, + "loss": 0.209381103515625, + "step": 9761 + }, + { + "epoch": 0.6598621062592943, + "grad_norm": 0.24718764424324036, + "learning_rate": 8.291174044483631e-06, + "loss": 0.03865814208984375, + "step": 9762 + }, + { + "epoch": 0.6599297012302284, + "grad_norm": 0.5573875308036804, + "learning_rate": 8.288223085735248e-06, + "loss": 0.07733154296875, + "step": 9763 + }, + { + "epoch": 0.6599972962011627, + "grad_norm": 0.8168099522590637, + "learning_rate": 8.285272451744357e-06, + "loss": 0.11292457580566406, + "step": 9764 + }, + { + "epoch": 0.6600648911720968, + "grad_norm": 0.2558348476886749, + "learning_rate": 8.28232214265375e-06, + "loss": 0.04398155212402344, + "step": 9765 + }, + { + "epoch": 0.660132486143031, + "grad_norm": 0.367768794298172, + "learning_rate": 8.279372158606176e-06, + "loss": 0.07534027099609375, + "step": 9766 + }, + { + "epoch": 0.6602000811139651, + "grad_norm": 0.3973563313484192, + "learning_rate": 8.276422499744371e-06, + "loss": 0.055812835693359375, + "step": 9767 + }, + { + "epoch": 0.6602676760848993, + "grad_norm": 0.46612417697906494, + "learning_rate": 8.273473166211059e-06, + "loss": 0.107391357421875, + "step": 9768 + }, + { + "epoch": 0.6603352710558335, + "grad_norm": 1.1420780420303345, + "learning_rate": 8.270524158148946e-06, + "loss": 0.215087890625, + "step": 9769 + }, + { + "epoch": 0.6604028660267676, + "grad_norm": 0.2524304687976837, + "learning_rate": 8.267575475700729e-06, + "loss": 0.04913330078125, + "step": 9770 + }, + { + "epoch": 0.6604704609977018, + "grad_norm": 0.5050324201583862, + "learning_rate": 8.264627119009074e-06, + "loss": 0.10219573974609375, + "step": 9771 + }, + { + "epoch": 0.6605380559686359, + "grad_norm": 2.6781742572784424, + "learning_rate": 8.261679088216645e-06, + "loss": 0.2490234375, + "step": 9772 + }, + { + "epoch": 0.6606056509395701, + "grad_norm": 0.2435009926557541, + "learning_rate": 8.25873138346609e-06, + "loss": 0.05541229248046875, + "step": 9773 + }, + { + "epoch": 0.6606732459105042, + "grad_norm": 0.961395263671875, + "learning_rate": 8.255784004900034e-06, + "loss": 0.1480712890625, + "step": 9774 + }, + { + "epoch": 0.6607408408814385, + "grad_norm": 0.2817211449146271, + "learning_rate": 8.252836952661083e-06, + "loss": 0.04721832275390625, + "step": 9775 + }, + { + "epoch": 0.6608084358523726, + "grad_norm": 0.9543573260307312, + "learning_rate": 8.249890226891852e-06, + "loss": 0.1617889404296875, + "step": 9776 + }, + { + "epoch": 0.6608760308233067, + "grad_norm": 0.25988057255744934, + "learning_rate": 8.246943827734899e-06, + "loss": 0.050811767578125, + "step": 9777 + }, + { + "epoch": 0.6609436257942409, + "grad_norm": 0.30485042929649353, + "learning_rate": 8.243997755332806e-06, + "loss": 0.05937957763671875, + "step": 9778 + }, + { + "epoch": 0.661011220765175, + "grad_norm": 1.0612722635269165, + "learning_rate": 8.241052009828118e-06, + "loss": 0.210662841796875, + "step": 9779 + }, + { + "epoch": 0.6610788157361093, + "grad_norm": 0.71140056848526, + "learning_rate": 8.238106591363365e-06, + "loss": 0.09720230102539062, + "step": 9780 + }, + { + "epoch": 0.6611464107070434, + "grad_norm": 0.7326464653015137, + "learning_rate": 8.235161500081068e-06, + "loss": 0.128326416015625, + "step": 9781 + }, + { + "epoch": 0.6612140056779776, + "grad_norm": 1.0815707445144653, + "learning_rate": 8.232216736123728e-06, + "loss": 0.18914794921875, + "step": 9782 + }, + { + "epoch": 0.6612816006489117, + "grad_norm": 0.28380855917930603, + "learning_rate": 8.22927229963383e-06, + "loss": 0.04253387451171875, + "step": 9783 + }, + { + "epoch": 0.6613491956198458, + "grad_norm": 0.529674768447876, + "learning_rate": 8.226328190753847e-06, + "loss": 0.09128570556640625, + "step": 9784 + }, + { + "epoch": 0.6614167905907801, + "grad_norm": 0.8251259922981262, + "learning_rate": 8.223384409626227e-06, + "loss": 0.170196533203125, + "step": 9785 + }, + { + "epoch": 0.6614843855617142, + "grad_norm": 0.3559858202934265, + "learning_rate": 8.220440956393416e-06, + "loss": 0.07234954833984375, + "step": 9786 + }, + { + "epoch": 0.6615519805326484, + "grad_norm": 1.2326463460922241, + "learning_rate": 8.217497831197833e-06, + "loss": 0.243377685546875, + "step": 9787 + }, + { + "epoch": 0.6616195755035825, + "grad_norm": 0.4090172350406647, + "learning_rate": 8.214555034181877e-06, + "loss": 0.0631561279296875, + "step": 9788 + }, + { + "epoch": 0.6616871704745168, + "grad_norm": 0.5870327949523926, + "learning_rate": 8.21161256548796e-06, + "loss": 0.11284255981445312, + "step": 9789 + }, + { + "epoch": 0.6617547654454509, + "grad_norm": 0.9595695734024048, + "learning_rate": 8.20867042525843e-06, + "loss": 0.180145263671875, + "step": 9790 + }, + { + "epoch": 0.661822360416385, + "grad_norm": 0.5982598066329956, + "learning_rate": 8.205728613635668e-06, + "loss": 0.12207794189453125, + "step": 9791 + }, + { + "epoch": 0.6618899553873192, + "grad_norm": 0.9584438800811768, + "learning_rate": 8.202787130762007e-06, + "loss": 0.13875198364257812, + "step": 9792 + }, + { + "epoch": 0.6619575503582533, + "grad_norm": 0.7089064121246338, + "learning_rate": 8.199845976779779e-06, + "loss": 0.1424713134765625, + "step": 9793 + }, + { + "epoch": 0.6620251453291875, + "grad_norm": 1.161237359046936, + "learning_rate": 8.196905151831287e-06, + "loss": 0.152587890625, + "step": 9794 + }, + { + "epoch": 0.6620927403001217, + "grad_norm": 0.33790385723114014, + "learning_rate": 8.193964656058837e-06, + "loss": 0.06463623046875, + "step": 9795 + }, + { + "epoch": 0.6621603352710559, + "grad_norm": 0.32355818152427673, + "learning_rate": 8.191024489604702e-06, + "loss": 0.0601806640625, + "step": 9796 + }, + { + "epoch": 0.66222793024199, + "grad_norm": 1.1928833723068237, + "learning_rate": 8.188084652611146e-06, + "loss": 0.16681671142578125, + "step": 9797 + }, + { + "epoch": 0.6622955252129241, + "grad_norm": 1.1443322896957397, + "learning_rate": 8.185145145220414e-06, + "loss": 0.17351722717285156, + "step": 9798 + }, + { + "epoch": 0.6623631201838583, + "grad_norm": 0.9634934067726135, + "learning_rate": 8.18220596757474e-06, + "loss": 0.15781021118164062, + "step": 9799 + }, + { + "epoch": 0.6624307151547925, + "grad_norm": 0.5966423153877258, + "learning_rate": 8.17926711981635e-06, + "loss": 0.11344146728515625, + "step": 9800 + }, + { + "epoch": 0.6624983101257267, + "grad_norm": 0.9625993967056274, + "learning_rate": 8.176328602087422e-06, + "loss": 0.1121826171875, + "step": 9801 + }, + { + "epoch": 0.6625659050966608, + "grad_norm": 0.9504796266555786, + "learning_rate": 8.173390414530167e-06, + "loss": 0.175262451171875, + "step": 9802 + }, + { + "epoch": 0.6626335000675949, + "grad_norm": 0.3382522165775299, + "learning_rate": 8.170452557286725e-06, + "loss": 0.069610595703125, + "step": 9803 + }, + { + "epoch": 0.6627010950385291, + "grad_norm": 0.8196136951446533, + "learning_rate": 8.167515030499265e-06, + "loss": 0.1092681884765625, + "step": 9804 + }, + { + "epoch": 0.6627686900094633, + "grad_norm": 0.4660516381263733, + "learning_rate": 8.164577834309926e-06, + "loss": 0.0858154296875, + "step": 9805 + }, + { + "epoch": 0.6628362849803975, + "grad_norm": 0.20316413044929504, + "learning_rate": 8.161640968860808e-06, + "loss": 0.03415679931640625, + "step": 9806 + }, + { + "epoch": 0.6629038799513316, + "grad_norm": 0.7521646618843079, + "learning_rate": 8.158704434294034e-06, + "loss": 0.160797119140625, + "step": 9807 + }, + { + "epoch": 0.6629714749222658, + "grad_norm": 0.4627911448478699, + "learning_rate": 8.155768230751685e-06, + "loss": 0.0944061279296875, + "step": 9808 + }, + { + "epoch": 0.6630390698931999, + "grad_norm": 0.7695252299308777, + "learning_rate": 8.152832358375832e-06, + "loss": 0.140533447265625, + "step": 9809 + }, + { + "epoch": 0.663106664864134, + "grad_norm": 0.737970232963562, + "learning_rate": 8.14989681730853e-06, + "loss": 0.11220932006835938, + "step": 9810 + }, + { + "epoch": 0.6631742598350683, + "grad_norm": 0.643197774887085, + "learning_rate": 8.146961607691823e-06, + "loss": 0.123382568359375, + "step": 9811 + }, + { + "epoch": 0.6632418548060024, + "grad_norm": 0.9374090433120728, + "learning_rate": 8.144026729667725e-06, + "loss": 0.20025634765625, + "step": 9812 + }, + { + "epoch": 0.6633094497769366, + "grad_norm": 0.29885292053222656, + "learning_rate": 8.141092183378263e-06, + "loss": 0.057125091552734375, + "step": 9813 + }, + { + "epoch": 0.6633770447478707, + "grad_norm": 0.9844199419021606, + "learning_rate": 8.138157968965405e-06, + "loss": 0.1483001708984375, + "step": 9814 + }, + { + "epoch": 0.663444639718805, + "grad_norm": 0.4438461661338806, + "learning_rate": 8.135224086571148e-06, + "loss": 0.085205078125, + "step": 9815 + }, + { + "epoch": 0.6635122346897391, + "grad_norm": 0.8500933051109314, + "learning_rate": 8.132290536337431e-06, + "loss": 0.15525054931640625, + "step": 9816 + }, + { + "epoch": 0.6635798296606732, + "grad_norm": 1.0959874391555786, + "learning_rate": 8.129357318406213e-06, + "loss": 0.1842041015625, + "step": 9817 + }, + { + "epoch": 0.6636474246316074, + "grad_norm": 0.3730488717556, + "learning_rate": 8.126424432919422e-06, + "loss": 0.0673370361328125, + "step": 9818 + }, + { + "epoch": 0.6637150196025415, + "grad_norm": 1.1483696699142456, + "learning_rate": 8.123491880018951e-06, + "loss": 0.166168212890625, + "step": 9819 + }, + { + "epoch": 0.6637826145734758, + "grad_norm": 0.8090205788612366, + "learning_rate": 8.120559659846714e-06, + "loss": 0.1430511474609375, + "step": 9820 + }, + { + "epoch": 0.6638502095444099, + "grad_norm": 0.3531555235385895, + "learning_rate": 8.117627772544582e-06, + "loss": 0.07448577880859375, + "step": 9821 + }, + { + "epoch": 0.6639178045153441, + "grad_norm": 1.848091721534729, + "learning_rate": 8.11469621825442e-06, + "loss": 0.1852264404296875, + "step": 9822 + }, + { + "epoch": 0.6639853994862782, + "grad_norm": 0.4878009855747223, + "learning_rate": 8.111764997118076e-06, + "loss": 0.0871124267578125, + "step": 9823 + }, + { + "epoch": 0.6640529944572123, + "grad_norm": 0.8349047303199768, + "learning_rate": 8.108834109277376e-06, + "loss": 0.13360595703125, + "step": 9824 + }, + { + "epoch": 0.6641205894281466, + "grad_norm": 0.48452162742614746, + "learning_rate": 8.105903554874133e-06, + "loss": 0.1030731201171875, + "step": 9825 + }, + { + "epoch": 0.6641881843990807, + "grad_norm": 1.2320460081100464, + "learning_rate": 8.102973334050162e-06, + "loss": 0.1521148681640625, + "step": 9826 + }, + { + "epoch": 0.6642557793700149, + "grad_norm": 0.7203375101089478, + "learning_rate": 8.10004344694722e-06, + "loss": 0.119720458984375, + "step": 9827 + }, + { + "epoch": 0.664323374340949, + "grad_norm": 0.2929922342300415, + "learning_rate": 8.097113893707095e-06, + "loss": 0.0711822509765625, + "step": 9828 + }, + { + "epoch": 0.6643909693118832, + "grad_norm": 0.310009628534317, + "learning_rate": 8.094184674471534e-06, + "loss": 0.05789947509765625, + "step": 9829 + }, + { + "epoch": 0.6644585642828174, + "grad_norm": 0.18958735466003418, + "learning_rate": 8.09125578938225e-06, + "loss": 0.039844512939453125, + "step": 9830 + }, + { + "epoch": 0.6645261592537515, + "grad_norm": 1.1728219985961914, + "learning_rate": 8.088327238580991e-06, + "loss": 0.2320404052734375, + "step": 9831 + }, + { + "epoch": 0.6645937542246857, + "grad_norm": 0.8090394139289856, + "learning_rate": 8.08539902220943e-06, + "loss": 0.1227264404296875, + "step": 9832 + }, + { + "epoch": 0.6646613491956198, + "grad_norm": 0.8129727840423584, + "learning_rate": 8.082471140409272e-06, + "loss": 0.152099609375, + "step": 9833 + }, + { + "epoch": 0.664728944166554, + "grad_norm": 0.6724995374679565, + "learning_rate": 8.079543593322182e-06, + "loss": 0.104522705078125, + "step": 9834 + }, + { + "epoch": 0.6647965391374882, + "grad_norm": 1.0276341438293457, + "learning_rate": 8.076616381089808e-06, + "loss": 0.1993408203125, + "step": 9835 + }, + { + "epoch": 0.6648641341084224, + "grad_norm": 0.6203305125236511, + "learning_rate": 8.07368950385379e-06, + "loss": 0.10186767578125, + "step": 9836 + }, + { + "epoch": 0.6649317290793565, + "grad_norm": 0.9537340402603149, + "learning_rate": 8.070762961755747e-06, + "loss": 0.158447265625, + "step": 9837 + }, + { + "epoch": 0.6649993240502906, + "grad_norm": 0.24404790997505188, + "learning_rate": 8.067836754937282e-06, + "loss": 0.0425567626953125, + "step": 9838 + }, + { + "epoch": 0.6650669190212248, + "grad_norm": 0.283121794462204, + "learning_rate": 8.064910883539995e-06, + "loss": 0.050273895263671875, + "step": 9839 + }, + { + "epoch": 0.665134513992159, + "grad_norm": 1.3973488807678223, + "learning_rate": 8.061985347705443e-06, + "loss": 0.1568450927734375, + "step": 9840 + }, + { + "epoch": 0.6652021089630932, + "grad_norm": 0.49451756477355957, + "learning_rate": 8.05906014757518e-06, + "loss": 0.1233978271484375, + "step": 9841 + }, + { + "epoch": 0.6652697039340273, + "grad_norm": 0.3385424017906189, + "learning_rate": 8.056135283290765e-06, + "loss": 0.07276153564453125, + "step": 9842 + }, + { + "epoch": 0.6653372989049615, + "grad_norm": 0.781506359577179, + "learning_rate": 8.053210754993697e-06, + "loss": 0.1373758316040039, + "step": 9843 + }, + { + "epoch": 0.6654048938758956, + "grad_norm": 0.2573831379413605, + "learning_rate": 8.050286562825499e-06, + "loss": 0.05253028869628906, + "step": 9844 + }, + { + "epoch": 0.6654724888468297, + "grad_norm": 1.082313895225525, + "learning_rate": 8.04736270692766e-06, + "loss": 0.16135787963867188, + "step": 9845 + }, + { + "epoch": 0.665540083817764, + "grad_norm": 0.6686891317367554, + "learning_rate": 8.044439187441648e-06, + "loss": 0.197296142578125, + "step": 9846 + }, + { + "epoch": 0.6656076787886981, + "grad_norm": 1.15994131565094, + "learning_rate": 8.041516004508924e-06, + "loss": 0.16641616821289062, + "step": 9847 + }, + { + "epoch": 0.6656752737596323, + "grad_norm": 0.3088085949420929, + "learning_rate": 8.038593158270932e-06, + "loss": 0.042385101318359375, + "step": 9848 + }, + { + "epoch": 0.6657428687305664, + "grad_norm": 0.3562376797199249, + "learning_rate": 8.035670648869094e-06, + "loss": 0.0648651123046875, + "step": 9849 + }, + { + "epoch": 0.6658104637015007, + "grad_norm": 0.597653865814209, + "learning_rate": 8.032748476444821e-06, + "loss": 0.0679779052734375, + "step": 9850 + }, + { + "epoch": 0.6658780586724348, + "grad_norm": 0.8119103908538818, + "learning_rate": 8.0298266411395e-06, + "loss": 0.1787109375, + "step": 9851 + }, + { + "epoch": 0.6659456536433689, + "grad_norm": 0.6502707600593567, + "learning_rate": 8.026905143094525e-06, + "loss": 0.1309967041015625, + "step": 9852 + }, + { + "epoch": 0.6660132486143031, + "grad_norm": 0.2706250846385956, + "learning_rate": 8.023983982451238e-06, + "loss": 0.026346206665039062, + "step": 9853 + }, + { + "epoch": 0.6660808435852372, + "grad_norm": 0.9067177772521973, + "learning_rate": 8.021063159350986e-06, + "loss": 0.1611480712890625, + "step": 9854 + }, + { + "epoch": 0.6661484385561715, + "grad_norm": 0.44001108407974243, + "learning_rate": 8.018142673935107e-06, + "loss": 0.07917022705078125, + "step": 9855 + }, + { + "epoch": 0.6662160335271056, + "grad_norm": 0.8243155479431152, + "learning_rate": 8.015222526344897e-06, + "loss": 0.1235809326171875, + "step": 9856 + }, + { + "epoch": 0.6662836284980398, + "grad_norm": 0.3811075687408447, + "learning_rate": 8.012302716721663e-06, + "loss": 0.07690048217773438, + "step": 9857 + }, + { + "epoch": 0.6663512234689739, + "grad_norm": 0.4654560089111328, + "learning_rate": 8.00938324520668e-06, + "loss": 0.0671234130859375, + "step": 9858 + }, + { + "epoch": 0.666418818439908, + "grad_norm": 0.4482252895832062, + "learning_rate": 8.006464111941211e-06, + "loss": 0.1200103759765625, + "step": 9859 + }, + { + "epoch": 0.6664864134108422, + "grad_norm": 0.49144425988197327, + "learning_rate": 8.003545317066501e-06, + "loss": 0.093414306640625, + "step": 9860 + }, + { + "epoch": 0.6665540083817764, + "grad_norm": 0.37927114963531494, + "learning_rate": 8.000626860723778e-06, + "loss": 0.0724639892578125, + "step": 9861 + }, + { + "epoch": 0.6666216033527106, + "grad_norm": 0.4207252860069275, + "learning_rate": 7.997708743054254e-06, + "loss": 0.0807647705078125, + "step": 9862 + }, + { + "epoch": 0.6666891983236447, + "grad_norm": 0.5563865303993225, + "learning_rate": 7.99479096419913e-06, + "loss": 0.09458160400390625, + "step": 9863 + }, + { + "epoch": 0.6667567932945789, + "grad_norm": 0.4277926981449127, + "learning_rate": 7.991873524299583e-06, + "loss": 0.0981903076171875, + "step": 9864 + }, + { + "epoch": 0.666824388265513, + "grad_norm": 0.5641177296638489, + "learning_rate": 7.98895642349678e-06, + "loss": 0.0754852294921875, + "step": 9865 + }, + { + "epoch": 0.6668919832364472, + "grad_norm": 0.348034530878067, + "learning_rate": 7.986039661931865e-06, + "loss": 0.051853179931640625, + "step": 9866 + }, + { + "epoch": 0.6669595782073814, + "grad_norm": 0.6825078725814819, + "learning_rate": 7.983123239745965e-06, + "loss": 0.15142822265625, + "step": 9867 + }, + { + "epoch": 0.6670271731783155, + "grad_norm": 0.9883996248245239, + "learning_rate": 7.980207157080211e-06, + "loss": 0.204010009765625, + "step": 9868 + }, + { + "epoch": 0.6670947681492497, + "grad_norm": 0.7080321907997131, + "learning_rate": 7.977291414075681e-06, + "loss": 0.0942840576171875, + "step": 9869 + }, + { + "epoch": 0.6671623631201838, + "grad_norm": 0.5982359051704407, + "learning_rate": 7.974376010873469e-06, + "loss": 0.06909561157226562, + "step": 9870 + }, + { + "epoch": 0.6672299580911181, + "grad_norm": 0.2888492941856384, + "learning_rate": 7.971460947614639e-06, + "loss": 0.04569244384765625, + "step": 9871 + }, + { + "epoch": 0.6672975530620522, + "grad_norm": 0.9991692304611206, + "learning_rate": 7.968546224440239e-06, + "loss": 0.177154541015625, + "step": 9872 + }, + { + "epoch": 0.6673651480329863, + "grad_norm": 0.40598738193511963, + "learning_rate": 7.9656318414913e-06, + "loss": 0.08270263671875, + "step": 9873 + }, + { + "epoch": 0.6674327430039205, + "grad_norm": 0.40162840485572815, + "learning_rate": 7.962717798908839e-06, + "loss": 0.07650375366210938, + "step": 9874 + }, + { + "epoch": 0.6675003379748546, + "grad_norm": 0.6372005343437195, + "learning_rate": 7.959804096833854e-06, + "loss": 0.10234832763671875, + "step": 9875 + }, + { + "epoch": 0.6675679329457889, + "grad_norm": 0.7234076857566833, + "learning_rate": 7.956890735407331e-06, + "loss": 0.10559844970703125, + "step": 9876 + }, + { + "epoch": 0.667635527916723, + "grad_norm": 0.4905923008918762, + "learning_rate": 7.953977714770236e-06, + "loss": 0.07584095001220703, + "step": 9877 + }, + { + "epoch": 0.6677031228876572, + "grad_norm": 0.22894977033138275, + "learning_rate": 7.951065035063515e-06, + "loss": 0.0277099609375, + "step": 9878 + }, + { + "epoch": 0.6677707178585913, + "grad_norm": 0.590185284614563, + "learning_rate": 7.948152696428106e-06, + "loss": 0.10428619384765625, + "step": 9879 + }, + { + "epoch": 0.6678383128295254, + "grad_norm": 0.5173256397247314, + "learning_rate": 7.945240699004918e-06, + "loss": 0.104888916015625, + "step": 9880 + }, + { + "epoch": 0.6679059078004597, + "grad_norm": 1.0524871349334717, + "learning_rate": 7.94232904293487e-06, + "loss": 0.120513916015625, + "step": 9881 + }, + { + "epoch": 0.6679735027713938, + "grad_norm": 0.553532600402832, + "learning_rate": 7.939417728358822e-06, + "loss": 0.11299896240234375, + "step": 9882 + }, + { + "epoch": 0.668041097742328, + "grad_norm": 0.4309335947036743, + "learning_rate": 7.936506755417661e-06, + "loss": 0.06425094604492188, + "step": 9883 + }, + { + "epoch": 0.6681086927132621, + "grad_norm": 0.319267600774765, + "learning_rate": 7.933596124252227e-06, + "loss": 0.051578521728515625, + "step": 9884 + }, + { + "epoch": 0.6681762876841963, + "grad_norm": 0.688642680644989, + "learning_rate": 7.930685835003362e-06, + "loss": 0.15313720703125, + "step": 9885 + }, + { + "epoch": 0.6682438826551305, + "grad_norm": 0.21445876359939575, + "learning_rate": 7.927775887811879e-06, + "loss": 0.03426361083984375, + "step": 9886 + }, + { + "epoch": 0.6683114776260646, + "grad_norm": 0.6968308091163635, + "learning_rate": 7.92486628281858e-06, + "loss": 0.09227561950683594, + "step": 9887 + }, + { + "epoch": 0.6683790725969988, + "grad_norm": 0.31652718782424927, + "learning_rate": 7.921957020164248e-06, + "loss": 0.0600128173828125, + "step": 9888 + }, + { + "epoch": 0.6684466675679329, + "grad_norm": 0.8447142839431763, + "learning_rate": 7.919048099989656e-06, + "loss": 0.1444854736328125, + "step": 9889 + }, + { + "epoch": 0.6685142625388671, + "grad_norm": 0.8120952844619751, + "learning_rate": 7.916139522435551e-06, + "loss": 0.11148834228515625, + "step": 9890 + }, + { + "epoch": 0.6685818575098013, + "grad_norm": 1.0646382570266724, + "learning_rate": 7.913231287642664e-06, + "loss": 0.182403564453125, + "step": 9891 + }, + { + "epoch": 0.6686494524807355, + "grad_norm": 0.3081199526786804, + "learning_rate": 7.910323395751732e-06, + "loss": 0.04844474792480469, + "step": 9892 + }, + { + "epoch": 0.6687170474516696, + "grad_norm": 0.22611203789710999, + "learning_rate": 7.907415846903433e-06, + "loss": 0.019565582275390625, + "step": 9893 + }, + { + "epoch": 0.6687846424226037, + "grad_norm": 0.8390123248100281, + "learning_rate": 7.904508641238475e-06, + "loss": 0.08240509033203125, + "step": 9894 + }, + { + "epoch": 0.6688522373935379, + "grad_norm": 1.5076946020126343, + "learning_rate": 7.901601778897502e-06, + "loss": 0.190704345703125, + "step": 9895 + }, + { + "epoch": 0.6689198323644721, + "grad_norm": 0.9192010760307312, + "learning_rate": 7.898695260021187e-06, + "loss": 0.169952392578125, + "step": 9896 + }, + { + "epoch": 0.6689874273354063, + "grad_norm": 1.2189120054244995, + "learning_rate": 7.895789084750159e-06, + "loss": 0.17779541015625, + "step": 9897 + }, + { + "epoch": 0.6690550223063404, + "grad_norm": 0.6582261323928833, + "learning_rate": 7.892883253225035e-06, + "loss": 0.1430511474609375, + "step": 9898 + }, + { + "epoch": 0.6691226172772746, + "grad_norm": 0.5724063515663147, + "learning_rate": 7.889977765586417e-06, + "loss": 0.0721588134765625, + "step": 9899 + }, + { + "epoch": 0.6691902122482087, + "grad_norm": 0.5089489817619324, + "learning_rate": 7.887072621974892e-06, + "loss": 0.08966827392578125, + "step": 9900 + }, + { + "epoch": 0.6692578072191429, + "grad_norm": 0.28530970215797424, + "learning_rate": 7.884167822531026e-06, + "loss": 0.04612159729003906, + "step": 9901 + }, + { + "epoch": 0.6693254021900771, + "grad_norm": 1.6389155387878418, + "learning_rate": 7.881263367395376e-06, + "loss": 0.158935546875, + "step": 9902 + }, + { + "epoch": 0.6693929971610112, + "grad_norm": 0.8421716690063477, + "learning_rate": 7.878359256708476e-06, + "loss": 0.1360626220703125, + "step": 9903 + }, + { + "epoch": 0.6694605921319454, + "grad_norm": 0.5859081149101257, + "learning_rate": 7.875455490610837e-06, + "loss": 0.09504318237304688, + "step": 9904 + }, + { + "epoch": 0.6695281871028795, + "grad_norm": 0.20371857285499573, + "learning_rate": 7.872552069242982e-06, + "loss": 0.03163909912109375, + "step": 9905 + }, + { + "epoch": 0.6695957820738138, + "grad_norm": 0.315529465675354, + "learning_rate": 7.869648992745369e-06, + "loss": 0.0613555908203125, + "step": 9906 + }, + { + "epoch": 0.6696633770447479, + "grad_norm": 1.2392171621322632, + "learning_rate": 7.866746261258493e-06, + "loss": 0.1611175537109375, + "step": 9907 + }, + { + "epoch": 0.669730972015682, + "grad_norm": 0.864017128944397, + "learning_rate": 7.863843874922784e-06, + "loss": 0.174285888671875, + "step": 9908 + }, + { + "epoch": 0.6697985669866162, + "grad_norm": 0.5545876622200012, + "learning_rate": 7.860941833878692e-06, + "loss": 0.10590362548828125, + "step": 9909 + }, + { + "epoch": 0.6698661619575503, + "grad_norm": 0.8106277585029602, + "learning_rate": 7.858040138266637e-06, + "loss": 0.136871337890625, + "step": 9910 + }, + { + "epoch": 0.6699337569284846, + "grad_norm": 0.39067408442497253, + "learning_rate": 7.855138788227003e-06, + "loss": 0.044124603271484375, + "step": 9911 + }, + { + "epoch": 0.6700013518994187, + "grad_norm": 0.3795899748802185, + "learning_rate": 7.852237783900194e-06, + "loss": 0.043060302734375, + "step": 9912 + }, + { + "epoch": 0.6700689468703529, + "grad_norm": 0.4862006902694702, + "learning_rate": 7.849337125426575e-06, + "loss": 0.0897979736328125, + "step": 9913 + }, + { + "epoch": 0.670136541841287, + "grad_norm": 0.3028227388858795, + "learning_rate": 7.846436812946493e-06, + "loss": 0.044071197509765625, + "step": 9914 + }, + { + "epoch": 0.6702041368122211, + "grad_norm": 0.9768158197402954, + "learning_rate": 7.843536846600287e-06, + "loss": 0.1771240234375, + "step": 9915 + }, + { + "epoch": 0.6702717317831554, + "grad_norm": 1.0540176630020142, + "learning_rate": 7.840637226528273e-06, + "loss": 0.1815643310546875, + "step": 9916 + }, + { + "epoch": 0.6703393267540895, + "grad_norm": 0.8449085354804993, + "learning_rate": 7.837737952870748e-06, + "loss": 0.12974929809570312, + "step": 9917 + }, + { + "epoch": 0.6704069217250237, + "grad_norm": 0.6776013970375061, + "learning_rate": 7.834839025768014e-06, + "loss": 0.1390838623046875, + "step": 9918 + }, + { + "epoch": 0.6704745166959578, + "grad_norm": 1.0755431652069092, + "learning_rate": 7.831940445360316e-06, + "loss": 0.172119140625, + "step": 9919 + }, + { + "epoch": 0.670542111666892, + "grad_norm": 0.6454526782035828, + "learning_rate": 7.829042211787923e-06, + "loss": 0.1363983154296875, + "step": 9920 + }, + { + "epoch": 0.6706097066378262, + "grad_norm": 0.3219689130783081, + "learning_rate": 7.826144325191063e-06, + "loss": 0.06330108642578125, + "step": 9921 + }, + { + "epoch": 0.6706773016087603, + "grad_norm": 0.5973139405250549, + "learning_rate": 7.823246785709955e-06, + "loss": 0.0960693359375, + "step": 9922 + }, + { + "epoch": 0.6707448965796945, + "grad_norm": 0.44542089104652405, + "learning_rate": 7.820349593484806e-06, + "loss": 0.07666015625, + "step": 9923 + }, + { + "epoch": 0.6708124915506286, + "grad_norm": 0.8380473256111145, + "learning_rate": 7.817452748655779e-06, + "loss": 0.162628173828125, + "step": 9924 + }, + { + "epoch": 0.6708800865215628, + "grad_norm": 0.33948633074760437, + "learning_rate": 7.814556251363062e-06, + "loss": 0.06479644775390625, + "step": 9925 + }, + { + "epoch": 0.670947681492497, + "grad_norm": 0.345112681388855, + "learning_rate": 7.811660101746803e-06, + "loss": 0.063568115234375, + "step": 9926 + }, + { + "epoch": 0.6710152764634312, + "grad_norm": 0.47076842188835144, + "learning_rate": 7.808764299947128e-06, + "loss": 0.0724029541015625, + "step": 9927 + }, + { + "epoch": 0.6710828714343653, + "grad_norm": 0.6003498435020447, + "learning_rate": 7.805868846104159e-06, + "loss": 0.09072113037109375, + "step": 9928 + }, + { + "epoch": 0.6711504664052994, + "grad_norm": 0.5881005525588989, + "learning_rate": 7.802973740357996e-06, + "loss": 0.08766555786132812, + "step": 9929 + }, + { + "epoch": 0.6712180613762336, + "grad_norm": 0.5102487802505493, + "learning_rate": 7.800078982848715e-06, + "loss": 0.09796142578125, + "step": 9930 + }, + { + "epoch": 0.6712856563471677, + "grad_norm": 0.3031611144542694, + "learning_rate": 7.797184573716401e-06, + "loss": 0.039432525634765625, + "step": 9931 + }, + { + "epoch": 0.671353251318102, + "grad_norm": 0.8290626406669617, + "learning_rate": 7.794290513101079e-06, + "loss": 0.10997581481933594, + "step": 9932 + }, + { + "epoch": 0.6714208462890361, + "grad_norm": 1.333088994026184, + "learning_rate": 7.791396801142798e-06, + "loss": 0.1662750244140625, + "step": 9933 + }, + { + "epoch": 0.6714884412599702, + "grad_norm": 0.4313846826553345, + "learning_rate": 7.788503437981577e-06, + "loss": 0.06171703338623047, + "step": 9934 + }, + { + "epoch": 0.6715560362309044, + "grad_norm": 0.31512174010276794, + "learning_rate": 7.785610423757396e-06, + "loss": 0.07776641845703125, + "step": 9935 + }, + { + "epoch": 0.6716236312018385, + "grad_norm": 0.41246914863586426, + "learning_rate": 7.782717758610254e-06, + "loss": 0.069976806640625, + "step": 9936 + }, + { + "epoch": 0.6716912261727728, + "grad_norm": 0.7299215793609619, + "learning_rate": 7.779825442680111e-06, + "loss": 0.106292724609375, + "step": 9937 + }, + { + "epoch": 0.6717588211437069, + "grad_norm": 1.666469931602478, + "learning_rate": 7.776933476106913e-06, + "loss": 0.1760101318359375, + "step": 9938 + }, + { + "epoch": 0.6718264161146411, + "grad_norm": 1.0561044216156006, + "learning_rate": 7.774041859030596e-06, + "loss": 0.161102294921875, + "step": 9939 + }, + { + "epoch": 0.6718940110855752, + "grad_norm": 0.4263424575328827, + "learning_rate": 7.771150591591071e-06, + "loss": 0.07305145263671875, + "step": 9940 + }, + { + "epoch": 0.6719616060565093, + "grad_norm": 0.4205397069454193, + "learning_rate": 7.768259673928233e-06, + "loss": 0.0637359619140625, + "step": 9941 + }, + { + "epoch": 0.6720292010274436, + "grad_norm": 0.6732243299484253, + "learning_rate": 7.76536910618197e-06, + "loss": 0.10036087036132812, + "step": 9942 + }, + { + "epoch": 0.6720967959983777, + "grad_norm": 0.28378161787986755, + "learning_rate": 7.762478888492132e-06, + "loss": 0.0411376953125, + "step": 9943 + }, + { + "epoch": 0.6721643909693119, + "grad_norm": 0.9551882743835449, + "learning_rate": 7.75958902099859e-06, + "loss": 0.163330078125, + "step": 9944 + }, + { + "epoch": 0.672231985940246, + "grad_norm": 0.6813310980796814, + "learning_rate": 7.75669950384115e-06, + "loss": 0.1287841796875, + "step": 9945 + }, + { + "epoch": 0.6722995809111803, + "grad_norm": 1.3225972652435303, + "learning_rate": 7.75381033715963e-06, + "loss": 0.1521148681640625, + "step": 9946 + }, + { + "epoch": 0.6723671758821144, + "grad_norm": 0.46360257267951965, + "learning_rate": 7.750921521093839e-06, + "loss": 0.07932281494140625, + "step": 9947 + }, + { + "epoch": 0.6724347708530485, + "grad_norm": 0.5328493118286133, + "learning_rate": 7.748033055783536e-06, + "loss": 0.077606201171875, + "step": 9948 + }, + { + "epoch": 0.6725023658239827, + "grad_norm": 0.7448934316635132, + "learning_rate": 7.745144941368501e-06, + "loss": 0.1029510498046875, + "step": 9949 + }, + { + "epoch": 0.6725699607949168, + "grad_norm": 0.49677544832229614, + "learning_rate": 7.74225717798847e-06, + "loss": 0.11175537109375, + "step": 9950 + }, + { + "epoch": 0.672637555765851, + "grad_norm": 0.3104415833950043, + "learning_rate": 7.73936976578317e-06, + "loss": 0.04948997497558594, + "step": 9951 + }, + { + "epoch": 0.6727051507367852, + "grad_norm": 0.37952369451522827, + "learning_rate": 7.736482704892315e-06, + "loss": 0.0589141845703125, + "step": 9952 + }, + { + "epoch": 0.6727727457077194, + "grad_norm": 1.2400628328323364, + "learning_rate": 7.7335959954556e-06, + "loss": 0.15348052978515625, + "step": 9953 + }, + { + "epoch": 0.6728403406786535, + "grad_norm": 0.5541436672210693, + "learning_rate": 7.730709637612698e-06, + "loss": 0.1109466552734375, + "step": 9954 + }, + { + "epoch": 0.6729079356495876, + "grad_norm": 1.017411231994629, + "learning_rate": 7.72782363150327e-06, + "loss": 0.215087890625, + "step": 9955 + }, + { + "epoch": 0.6729755306205218, + "grad_norm": 0.5552272796630859, + "learning_rate": 7.724937977266956e-06, + "loss": 0.0982666015625, + "step": 9956 + }, + { + "epoch": 0.673043125591456, + "grad_norm": 1.5105828046798706, + "learning_rate": 7.722052675043396e-06, + "loss": 0.2035369873046875, + "step": 9957 + }, + { + "epoch": 0.6731107205623902, + "grad_norm": 0.5780941843986511, + "learning_rate": 7.719167724972183e-06, + "loss": 0.0984039306640625, + "step": 9958 + }, + { + "epoch": 0.6731783155333243, + "grad_norm": 0.8137200474739075, + "learning_rate": 7.71628312719291e-06, + "loss": 0.181976318359375, + "step": 9959 + }, + { + "epoch": 0.6732459105042585, + "grad_norm": 0.4534251391887665, + "learning_rate": 7.713398881845167e-06, + "loss": 0.0825653076171875, + "step": 9960 + }, + { + "epoch": 0.6733135054751926, + "grad_norm": 0.40107113122940063, + "learning_rate": 7.71051498906849e-06, + "loss": 0.0857696533203125, + "step": 9961 + }, + { + "epoch": 0.6733811004461268, + "grad_norm": 0.3853451609611511, + "learning_rate": 7.707631449002437e-06, + "loss": 0.07402420043945312, + "step": 9962 + }, + { + "epoch": 0.673448695417061, + "grad_norm": 0.5321764945983887, + "learning_rate": 7.704748261786522e-06, + "loss": 0.06209754943847656, + "step": 9963 + }, + { + "epoch": 0.6735162903879951, + "grad_norm": 0.26348644495010376, + "learning_rate": 7.701865427560259e-06, + "loss": 0.04436492919921875, + "step": 9964 + }, + { + "epoch": 0.6735838853589293, + "grad_norm": 1.02108633518219, + "learning_rate": 7.69898294646313e-06, + "loss": 0.16039276123046875, + "step": 9965 + }, + { + "epoch": 0.6736514803298634, + "grad_norm": 0.4455641508102417, + "learning_rate": 7.696100818634613e-06, + "loss": 0.0865478515625, + "step": 9966 + }, + { + "epoch": 0.6737190753007977, + "grad_norm": 0.256501168012619, + "learning_rate": 7.693219044214154e-06, + "loss": 0.049739837646484375, + "step": 9967 + }, + { + "epoch": 0.6737866702717318, + "grad_norm": 0.9401436448097229, + "learning_rate": 7.69033762334121e-06, + "loss": 0.1481475830078125, + "step": 9968 + }, + { + "epoch": 0.6738542652426659, + "grad_norm": 0.9594399333000183, + "learning_rate": 7.687456556155184e-06, + "loss": 0.1740570068359375, + "step": 9969 + }, + { + "epoch": 0.6739218602136001, + "grad_norm": 0.38827142119407654, + "learning_rate": 7.684575842795486e-06, + "loss": 0.07259750366210938, + "step": 9970 + }, + { + "epoch": 0.6739894551845342, + "grad_norm": 0.9108757376670837, + "learning_rate": 7.681695483401502e-06, + "loss": 0.136962890625, + "step": 9971 + }, + { + "epoch": 0.6740570501554685, + "grad_norm": 0.5834417939186096, + "learning_rate": 7.678815478112597e-06, + "loss": 0.08160018920898438, + "step": 9972 + }, + { + "epoch": 0.6741246451264026, + "grad_norm": 0.39275455474853516, + "learning_rate": 7.675935827068143e-06, + "loss": 0.08950042724609375, + "step": 9973 + }, + { + "epoch": 0.6741922400973368, + "grad_norm": 0.3458172380924225, + "learning_rate": 7.673056530407446e-06, + "loss": 0.06308746337890625, + "step": 9974 + }, + { + "epoch": 0.6742598350682709, + "grad_norm": 0.4120544493198395, + "learning_rate": 7.670177588269849e-06, + "loss": 0.054656982421875, + "step": 9975 + }, + { + "epoch": 0.674327430039205, + "grad_norm": 0.3350735306739807, + "learning_rate": 7.667299000794641e-06, + "loss": 0.0626373291015625, + "step": 9976 + }, + { + "epoch": 0.6743950250101393, + "grad_norm": 0.8753618001937866, + "learning_rate": 7.664420768121112e-06, + "loss": 0.14303207397460938, + "step": 9977 + }, + { + "epoch": 0.6744626199810734, + "grad_norm": 0.9037492275238037, + "learning_rate": 7.661542890388524e-06, + "loss": 0.16595458984375, + "step": 9978 + }, + { + "epoch": 0.6745302149520076, + "grad_norm": 0.24711447954177856, + "learning_rate": 7.658665367736131e-06, + "loss": 0.04773712158203125, + "step": 9979 + }, + { + "epoch": 0.6745978099229417, + "grad_norm": 0.9414985179901123, + "learning_rate": 7.655788200303159e-06, + "loss": 0.1571044921875, + "step": 9980 + }, + { + "epoch": 0.6746654048938759, + "grad_norm": 1.4349998235702515, + "learning_rate": 7.65291138822883e-06, + "loss": 0.16260910034179688, + "step": 9981 + }, + { + "epoch": 0.6747329998648101, + "grad_norm": 0.42412206530570984, + "learning_rate": 7.650034931652338e-06, + "loss": 0.08258056640625, + "step": 9982 + }, + { + "epoch": 0.6748005948357442, + "grad_norm": 0.826946496963501, + "learning_rate": 7.647158830712857e-06, + "loss": 0.12308120727539062, + "step": 9983 + }, + { + "epoch": 0.6748681898066784, + "grad_norm": 0.731910228729248, + "learning_rate": 7.644283085549573e-06, + "loss": 0.1502685546875, + "step": 9984 + }, + { + "epoch": 0.6749357847776125, + "grad_norm": 0.6917160749435425, + "learning_rate": 7.641407696301605e-06, + "loss": 0.1204833984375, + "step": 9985 + }, + { + "epoch": 0.6750033797485467, + "grad_norm": 0.5262182950973511, + "learning_rate": 7.638532663108107e-06, + "loss": 0.08391571044921875, + "step": 9986 + }, + { + "epoch": 0.6750709747194809, + "grad_norm": 0.5180113911628723, + "learning_rate": 7.635657986108167e-06, + "loss": 0.106353759765625, + "step": 9987 + }, + { + "epoch": 0.6751385696904151, + "grad_norm": 0.36424604058265686, + "learning_rate": 7.6327836654409e-06, + "loss": 0.06324005126953125, + "step": 9988 + }, + { + "epoch": 0.6752061646613492, + "grad_norm": 0.5074849724769592, + "learning_rate": 7.629909701245376e-06, + "loss": 0.08611297607421875, + "step": 9989 + }, + { + "epoch": 0.6752737596322833, + "grad_norm": 0.92763352394104, + "learning_rate": 7.627036093660651e-06, + "loss": 0.154296875, + "step": 9990 + }, + { + "epoch": 0.6753413546032175, + "grad_norm": 0.2760682702064514, + "learning_rate": 7.6241628428257736e-06, + "loss": 0.056884765625, + "step": 9991 + }, + { + "epoch": 0.6754089495741517, + "grad_norm": 1.0570344924926758, + "learning_rate": 7.62128994887977e-06, + "loss": 0.15215301513671875, + "step": 9992 + }, + { + "epoch": 0.6754765445450859, + "grad_norm": 0.6622770428657532, + "learning_rate": 7.618417411961642e-06, + "loss": 0.10799503326416016, + "step": 9993 + }, + { + "epoch": 0.67554413951602, + "grad_norm": 0.3253067433834076, + "learning_rate": 7.615545232210388e-06, + "loss": 0.0707244873046875, + "step": 9994 + }, + { + "epoch": 0.6756117344869542, + "grad_norm": 0.7697176337242126, + "learning_rate": 7.612673409764979e-06, + "loss": 0.12923431396484375, + "step": 9995 + }, + { + "epoch": 0.6756793294578883, + "grad_norm": 0.25476884841918945, + "learning_rate": 7.609801944764367e-06, + "loss": 0.037120819091796875, + "step": 9996 + }, + { + "epoch": 0.6757469244288224, + "grad_norm": 0.7484957575798035, + "learning_rate": 7.606930837347504e-06, + "loss": 0.10691261291503906, + "step": 9997 + }, + { + "epoch": 0.6758145193997567, + "grad_norm": 0.2950274646282196, + "learning_rate": 7.604060087653295e-06, + "loss": 0.05312347412109375, + "step": 9998 + }, + { + "epoch": 0.6758821143706908, + "grad_norm": 0.3099628686904907, + "learning_rate": 7.601189695820664e-06, + "loss": 0.062530517578125, + "step": 9999 + }, + { + "epoch": 0.675949709341625, + "grad_norm": 0.28750497102737427, + "learning_rate": 7.598319661988479e-06, + "loss": 0.028728485107421875, + "step": 10000 + }, + { + "epoch": 0.6760173043125591, + "grad_norm": 0.6362106800079346, + "learning_rate": 7.595449986295623e-06, + "loss": 0.13179779052734375, + "step": 10001 + }, + { + "epoch": 0.6760848992834934, + "grad_norm": 0.3210277855396271, + "learning_rate": 7.592580668880945e-06, + "loss": 0.06880950927734375, + "step": 10002 + }, + { + "epoch": 0.6761524942544275, + "grad_norm": 0.2826496660709381, + "learning_rate": 7.589711709883281e-06, + "loss": 0.06772613525390625, + "step": 10003 + }, + { + "epoch": 0.6762200892253616, + "grad_norm": 0.25771158933639526, + "learning_rate": 7.586843109441449e-06, + "loss": 0.05599212646484375, + "step": 10004 + }, + { + "epoch": 0.6762876841962958, + "grad_norm": 0.19254781305789948, + "learning_rate": 7.583974867694251e-06, + "loss": 0.027914047241210938, + "step": 10005 + }, + { + "epoch": 0.6763552791672299, + "grad_norm": 1.0011084079742432, + "learning_rate": 7.5811069847804655e-06, + "loss": 0.21063232421875, + "step": 10006 + }, + { + "epoch": 0.6764228741381642, + "grad_norm": 0.842735767364502, + "learning_rate": 7.578239460838863e-06, + "loss": 0.1265411376953125, + "step": 10007 + }, + { + "epoch": 0.6764904691090983, + "grad_norm": 0.4227350652217865, + "learning_rate": 7.575372296008191e-06, + "loss": 0.0680999755859375, + "step": 10008 + }, + { + "epoch": 0.6765580640800325, + "grad_norm": 0.7778345346450806, + "learning_rate": 7.572505490427175e-06, + "loss": 0.133880615234375, + "step": 10009 + }, + { + "epoch": 0.6766256590509666, + "grad_norm": 0.9263967871665955, + "learning_rate": 7.569639044234545e-06, + "loss": 0.15424346923828125, + "step": 10010 + }, + { + "epoch": 0.6766932540219007, + "grad_norm": 0.8142989277839661, + "learning_rate": 7.566772957568977e-06, + "loss": 0.1337432861328125, + "step": 10011 + }, + { + "epoch": 0.676760848992835, + "grad_norm": 0.6743316650390625, + "learning_rate": 7.563907230569166e-06, + "loss": 0.1194305419921875, + "step": 10012 + }, + { + "epoch": 0.6768284439637691, + "grad_norm": 0.7376366257667542, + "learning_rate": 7.561041863373764e-06, + "loss": 0.12760162353515625, + "step": 10013 + }, + { + "epoch": 0.6768960389347033, + "grad_norm": 0.7504464983940125, + "learning_rate": 7.558176856121423e-06, + "loss": 0.1399078369140625, + "step": 10014 + }, + { + "epoch": 0.6769636339056374, + "grad_norm": 0.39670872688293457, + "learning_rate": 7.55531220895077e-06, + "loss": 0.06984138488769531, + "step": 10015 + }, + { + "epoch": 0.6770312288765716, + "grad_norm": 1.494649052619934, + "learning_rate": 7.552447922000398e-06, + "loss": 0.205291748046875, + "step": 10016 + }, + { + "epoch": 0.6770988238475057, + "grad_norm": 1.1395816802978516, + "learning_rate": 7.54958399540892e-06, + "loss": 0.23931884765625, + "step": 10017 + }, + { + "epoch": 0.6771664188184399, + "grad_norm": 0.8878988027572632, + "learning_rate": 7.546720429314899e-06, + "loss": 0.15240478515625, + "step": 10018 + }, + { + "epoch": 0.6772340137893741, + "grad_norm": 0.23348687589168549, + "learning_rate": 7.543857223856897e-06, + "loss": 0.030631065368652344, + "step": 10019 + }, + { + "epoch": 0.6773016087603082, + "grad_norm": 0.8669266700744629, + "learning_rate": 7.5409943791734515e-06, + "loss": 0.14324188232421875, + "step": 10020 + }, + { + "epoch": 0.6773692037312424, + "grad_norm": 0.698235273361206, + "learning_rate": 7.538131895403085e-06, + "loss": 0.1353759765625, + "step": 10021 + }, + { + "epoch": 0.6774367987021765, + "grad_norm": 0.38883766531944275, + "learning_rate": 7.535269772684295e-06, + "loss": 0.050563812255859375, + "step": 10022 + }, + { + "epoch": 0.6775043936731108, + "grad_norm": 0.47600144147872925, + "learning_rate": 7.5324080111555885e-06, + "loss": 0.074462890625, + "step": 10023 + }, + { + "epoch": 0.6775719886440449, + "grad_norm": 0.34079015254974365, + "learning_rate": 7.5295466109554125e-06, + "loss": 0.06269073486328125, + "step": 10024 + }, + { + "epoch": 0.677639583614979, + "grad_norm": 0.5583035349845886, + "learning_rate": 7.526685572222233e-06, + "loss": 0.09061813354492188, + "step": 10025 + }, + { + "epoch": 0.6777071785859132, + "grad_norm": 0.18172992765903473, + "learning_rate": 7.523824895094484e-06, + "loss": 0.0353546142578125, + "step": 10026 + }, + { + "epoch": 0.6777747735568473, + "grad_norm": 0.7406035661697388, + "learning_rate": 7.520964579710579e-06, + "loss": 0.11763763427734375, + "step": 10027 + }, + { + "epoch": 0.6778423685277816, + "grad_norm": 1.2060794830322266, + "learning_rate": 7.5181046262089195e-06, + "loss": 0.1757354736328125, + "step": 10028 + }, + { + "epoch": 0.6779099634987157, + "grad_norm": 0.28050199151039124, + "learning_rate": 7.515245034727889e-06, + "loss": 0.05492401123046875, + "step": 10029 + }, + { + "epoch": 0.6779775584696499, + "grad_norm": 0.9575992822647095, + "learning_rate": 7.512385805405848e-06, + "loss": 0.16466522216796875, + "step": 10030 + }, + { + "epoch": 0.678045153440584, + "grad_norm": 0.7346720099449158, + "learning_rate": 7.509526938381148e-06, + "loss": 0.13262939453125, + "step": 10031 + }, + { + "epoch": 0.6781127484115181, + "grad_norm": 1.5931451320648193, + "learning_rate": 7.50666843379212e-06, + "loss": 0.2084808349609375, + "step": 10032 + }, + { + "epoch": 0.6781803433824524, + "grad_norm": 0.9250758290290833, + "learning_rate": 7.503810291777071e-06, + "loss": 0.184112548828125, + "step": 10033 + }, + { + "epoch": 0.6782479383533865, + "grad_norm": 0.39907267689704895, + "learning_rate": 7.500952512474298e-06, + "loss": 0.08160400390625, + "step": 10034 + }, + { + "epoch": 0.6783155333243207, + "grad_norm": 0.5511515140533447, + "learning_rate": 7.498095096022074e-06, + "loss": 0.09033966064453125, + "step": 10035 + }, + { + "epoch": 0.6783831282952548, + "grad_norm": 0.8408726453781128, + "learning_rate": 7.495238042558675e-06, + "loss": 0.11486053466796875, + "step": 10036 + }, + { + "epoch": 0.678450723266189, + "grad_norm": 1.0211777687072754, + "learning_rate": 7.492381352222318e-06, + "loss": 0.11388015747070312, + "step": 10037 + }, + { + "epoch": 0.6785183182371232, + "grad_norm": 0.7227544188499451, + "learning_rate": 7.4895250251512475e-06, + "loss": 0.1067657470703125, + "step": 10038 + }, + { + "epoch": 0.6785859132080573, + "grad_norm": 0.7323225140571594, + "learning_rate": 7.4866690614836695e-06, + "loss": 0.1211395263671875, + "step": 10039 + }, + { + "epoch": 0.6786535081789915, + "grad_norm": 0.28859826922416687, + "learning_rate": 7.483813461357756e-06, + "loss": 0.04796028137207031, + "step": 10040 + }, + { + "epoch": 0.6787211031499256, + "grad_norm": 0.2895309031009674, + "learning_rate": 7.480958224911694e-06, + "loss": 0.05512237548828125, + "step": 10041 + }, + { + "epoch": 0.6787886981208598, + "grad_norm": 0.47729620337486267, + "learning_rate": 7.478103352283634e-06, + "loss": 0.1032257080078125, + "step": 10042 + }, + { + "epoch": 0.678856293091794, + "grad_norm": 0.2488049566745758, + "learning_rate": 7.475248843611713e-06, + "loss": 0.0420074462890625, + "step": 10043 + }, + { + "epoch": 0.6789238880627282, + "grad_norm": 0.5820322632789612, + "learning_rate": 7.472394699034048e-06, + "loss": 0.114227294921875, + "step": 10044 + }, + { + "epoch": 0.6789914830336623, + "grad_norm": 0.3719213008880615, + "learning_rate": 7.469540918688741e-06, + "loss": 0.06929302215576172, + "step": 10045 + }, + { + "epoch": 0.6790590780045964, + "grad_norm": 0.20116841793060303, + "learning_rate": 7.466687502713876e-06, + "loss": 0.029911041259765625, + "step": 10046 + }, + { + "epoch": 0.6791266729755306, + "grad_norm": 1.2619340419769287, + "learning_rate": 7.463834451247519e-06, + "loss": 0.14456939697265625, + "step": 10047 + }, + { + "epoch": 0.6791942679464648, + "grad_norm": 0.8344956040382385, + "learning_rate": 7.460981764427712e-06, + "loss": 0.110015869140625, + "step": 10048 + }, + { + "epoch": 0.679261862917399, + "grad_norm": 0.2903244197368622, + "learning_rate": 7.4581294423925046e-06, + "loss": 0.0502166748046875, + "step": 10049 + }, + { + "epoch": 0.6793294578883331, + "grad_norm": 0.21162579953670502, + "learning_rate": 7.4552774852798885e-06, + "loss": 0.04143524169921875, + "step": 10050 + }, + { + "epoch": 0.6793970528592673, + "grad_norm": 0.39846518635749817, + "learning_rate": 7.452425893227865e-06, + "loss": 0.07073974609375, + "step": 10051 + }, + { + "epoch": 0.6794646478302014, + "grad_norm": 0.4567797780036926, + "learning_rate": 7.449574666374426e-06, + "loss": 0.09761810302734375, + "step": 10052 + }, + { + "epoch": 0.6795322428011356, + "grad_norm": 0.4031340479850769, + "learning_rate": 7.446723804857508e-06, + "loss": 0.05266571044921875, + "step": 10053 + }, + { + "epoch": 0.6795998377720698, + "grad_norm": 0.2076416015625, + "learning_rate": 7.44387330881507e-06, + "loss": 0.037136077880859375, + "step": 10054 + }, + { + "epoch": 0.6796674327430039, + "grad_norm": 0.7134944200515747, + "learning_rate": 7.4410231783850345e-06, + "loss": 0.144683837890625, + "step": 10055 + }, + { + "epoch": 0.6797350277139381, + "grad_norm": 1.3354684114456177, + "learning_rate": 7.438173413705304e-06, + "loss": 0.207916259765625, + "step": 10056 + }, + { + "epoch": 0.6798026226848722, + "grad_norm": 1.1575521230697632, + "learning_rate": 7.435324014913774e-06, + "loss": 0.19205093383789062, + "step": 10057 + }, + { + "epoch": 0.6798702176558064, + "grad_norm": 0.3883650600910187, + "learning_rate": 7.432474982148309e-06, + "loss": 0.0691680908203125, + "step": 10058 + }, + { + "epoch": 0.6799378126267406, + "grad_norm": 0.3704533874988556, + "learning_rate": 7.429626315546762e-06, + "loss": 0.071746826171875, + "step": 10059 + }, + { + "epoch": 0.6800054075976747, + "grad_norm": 0.45551925897598267, + "learning_rate": 7.426778015246984e-06, + "loss": 0.0756072998046875, + "step": 10060 + }, + { + "epoch": 0.6800730025686089, + "grad_norm": 0.6581755876541138, + "learning_rate": 7.4239300813867725e-06, + "loss": 0.1378631591796875, + "step": 10061 + }, + { + "epoch": 0.680140597539543, + "grad_norm": 0.26337578892707825, + "learning_rate": 7.42108251410395e-06, + "loss": 0.036602020263671875, + "step": 10062 + }, + { + "epoch": 0.6802081925104773, + "grad_norm": 0.5091298818588257, + "learning_rate": 7.418235313536282e-06, + "loss": 0.11124420166015625, + "step": 10063 + }, + { + "epoch": 0.6802757874814114, + "grad_norm": 1.0790324211120605, + "learning_rate": 7.415388479821534e-06, + "loss": 0.156494140625, + "step": 10064 + }, + { + "epoch": 0.6803433824523455, + "grad_norm": 0.5225908756256104, + "learning_rate": 7.412542013097473e-06, + "loss": 0.12287139892578125, + "step": 10065 + }, + { + "epoch": 0.6804109774232797, + "grad_norm": 0.3515343964099884, + "learning_rate": 7.409695913501801e-06, + "loss": 0.063079833984375, + "step": 10066 + }, + { + "epoch": 0.6804785723942138, + "grad_norm": 1.1395515203475952, + "learning_rate": 7.406850181172253e-06, + "loss": 0.250396728515625, + "step": 10067 + }, + { + "epoch": 0.6805461673651481, + "grad_norm": 0.9736652374267578, + "learning_rate": 7.404004816246512e-06, + "loss": 0.16327667236328125, + "step": 10068 + }, + { + "epoch": 0.6806137623360822, + "grad_norm": 0.27342313528060913, + "learning_rate": 7.4011598188622585e-06, + "loss": 0.034069061279296875, + "step": 10069 + }, + { + "epoch": 0.6806813573070164, + "grad_norm": 0.844028651714325, + "learning_rate": 7.398315189157151e-06, + "loss": 0.1244659423828125, + "step": 10070 + }, + { + "epoch": 0.6807489522779505, + "grad_norm": 0.8142969608306885, + "learning_rate": 7.395470927268826e-06, + "loss": 0.11383819580078125, + "step": 10071 + }, + { + "epoch": 0.6808165472488846, + "grad_norm": 0.4557226598262787, + "learning_rate": 7.392627033334905e-06, + "loss": 0.09015655517578125, + "step": 10072 + }, + { + "epoch": 0.6808841422198189, + "grad_norm": 0.5014898777008057, + "learning_rate": 7.389783507493009e-06, + "loss": 0.06878662109375, + "step": 10073 + }, + { + "epoch": 0.680951737190753, + "grad_norm": 0.5901910066604614, + "learning_rate": 7.38694034988071e-06, + "loss": 0.06015777587890625, + "step": 10074 + }, + { + "epoch": 0.6810193321616872, + "grad_norm": 1.0109821557998657, + "learning_rate": 7.384097560635575e-06, + "loss": 0.1576995849609375, + "step": 10075 + }, + { + "epoch": 0.6810869271326213, + "grad_norm": 0.2438887357711792, + "learning_rate": 7.381255139895174e-06, + "loss": 0.024351119995117188, + "step": 10076 + }, + { + "epoch": 0.6811545221035555, + "grad_norm": 0.4989915192127228, + "learning_rate": 7.378413087797019e-06, + "loss": 0.11456298828125, + "step": 10077 + }, + { + "epoch": 0.6812221170744897, + "grad_norm": 0.4947536587715149, + "learning_rate": 7.375571404478648e-06, + "loss": 0.1119537353515625, + "step": 10078 + }, + { + "epoch": 0.6812897120454238, + "grad_norm": 0.6618500351905823, + "learning_rate": 7.3727300900775385e-06, + "loss": 0.15802001953125, + "step": 10079 + }, + { + "epoch": 0.681357307016358, + "grad_norm": 0.3459532558917999, + "learning_rate": 7.369889144731185e-06, + "loss": 0.07059860229492188, + "step": 10080 + }, + { + "epoch": 0.6814249019872921, + "grad_norm": 0.7026773691177368, + "learning_rate": 7.367048568577045e-06, + "loss": 0.125274658203125, + "step": 10081 + }, + { + "epoch": 0.6814924969582263, + "grad_norm": 0.24871975183486938, + "learning_rate": 7.364208361752565e-06, + "loss": 0.0396728515625, + "step": 10082 + }, + { + "epoch": 0.6815600919291604, + "grad_norm": 0.8116351962089539, + "learning_rate": 7.361368524395171e-06, + "loss": 0.111297607421875, + "step": 10083 + }, + { + "epoch": 0.6816276869000947, + "grad_norm": 0.24203963577747345, + "learning_rate": 7.3585290566422715e-06, + "loss": 0.057188987731933594, + "step": 10084 + }, + { + "epoch": 0.6816952818710288, + "grad_norm": 0.46231356263160706, + "learning_rate": 7.355689958631259e-06, + "loss": 0.098297119140625, + "step": 10085 + }, + { + "epoch": 0.6817628768419629, + "grad_norm": 0.3355720341205597, + "learning_rate": 7.3528512304995045e-06, + "loss": 0.07703399658203125, + "step": 10086 + }, + { + "epoch": 0.6818304718128971, + "grad_norm": 1.0119128227233887, + "learning_rate": 7.350012872384365e-06, + "loss": 0.197479248046875, + "step": 10087 + }, + { + "epoch": 0.6818980667838312, + "grad_norm": 0.5100365281105042, + "learning_rate": 7.347174884423173e-06, + "loss": 0.09832000732421875, + "step": 10088 + }, + { + "epoch": 0.6819656617547655, + "grad_norm": 0.4216068387031555, + "learning_rate": 7.344337266753263e-06, + "loss": 0.08447265625, + "step": 10089 + }, + { + "epoch": 0.6820332567256996, + "grad_norm": 0.3490038812160492, + "learning_rate": 7.341500019511915e-06, + "loss": 0.0522308349609375, + "step": 10090 + }, + { + "epoch": 0.6821008516966338, + "grad_norm": 0.4419478178024292, + "learning_rate": 7.338663142836435e-06, + "loss": 0.0944976806640625, + "step": 10091 + }, + { + "epoch": 0.6821684466675679, + "grad_norm": 0.7709576487541199, + "learning_rate": 7.335826636864065e-06, + "loss": 0.1763916015625, + "step": 10092 + }, + { + "epoch": 0.682236041638502, + "grad_norm": 0.35479119420051575, + "learning_rate": 7.332990501732071e-06, + "loss": 0.04775238037109375, + "step": 10093 + }, + { + "epoch": 0.6823036366094363, + "grad_norm": 0.8989418148994446, + "learning_rate": 7.330154737577678e-06, + "loss": 0.118377685546875, + "step": 10094 + }, + { + "epoch": 0.6823712315803704, + "grad_norm": 0.5191197991371155, + "learning_rate": 7.327319344538098e-06, + "loss": 0.104583740234375, + "step": 10095 + }, + { + "epoch": 0.6824388265513046, + "grad_norm": 0.5671801567077637, + "learning_rate": 7.3244843227505225e-06, + "loss": 0.0880279541015625, + "step": 10096 + }, + { + "epoch": 0.6825064215222387, + "grad_norm": 1.6327184438705444, + "learning_rate": 7.32164967235213e-06, + "loss": 0.203369140625, + "step": 10097 + }, + { + "epoch": 0.682574016493173, + "grad_norm": 0.4051584005355835, + "learning_rate": 7.318815393480076e-06, + "loss": 0.08233261108398438, + "step": 10098 + }, + { + "epoch": 0.6826416114641071, + "grad_norm": 0.342202752828598, + "learning_rate": 7.315981486271504e-06, + "loss": 0.056182861328125, + "step": 10099 + }, + { + "epoch": 0.6827092064350412, + "grad_norm": 0.32180097699165344, + "learning_rate": 7.313147950863533e-06, + "loss": 0.029811859130859375, + "step": 10100 + }, + { + "epoch": 0.6827768014059754, + "grad_norm": 1.0068494081497192, + "learning_rate": 7.310314787393263e-06, + "loss": 0.1304302215576172, + "step": 10101 + }, + { + "epoch": 0.6828443963769095, + "grad_norm": 0.2766839563846588, + "learning_rate": 7.3074819959977954e-06, + "loss": 0.044891357421875, + "step": 10102 + }, + { + "epoch": 0.6829119913478437, + "grad_norm": 0.7882079482078552, + "learning_rate": 7.304649576814178e-06, + "loss": 0.103546142578125, + "step": 10103 + }, + { + "epoch": 0.6829795863187779, + "grad_norm": 0.8847792148590088, + "learning_rate": 7.3018175299794755e-06, + "loss": 0.1668701171875, + "step": 10104 + }, + { + "epoch": 0.6830471812897121, + "grad_norm": 0.6864927411079407, + "learning_rate": 7.298985855630717e-06, + "loss": 0.06488800048828125, + "step": 10105 + }, + { + "epoch": 0.6831147762606462, + "grad_norm": 0.6489642262458801, + "learning_rate": 7.296154553904914e-06, + "loss": 0.11663055419921875, + "step": 10106 + }, + { + "epoch": 0.6831823712315803, + "grad_norm": 1.3590972423553467, + "learning_rate": 7.293323624939063e-06, + "loss": 0.2144775390625, + "step": 10107 + }, + { + "epoch": 0.6832499662025145, + "grad_norm": 0.2529042959213257, + "learning_rate": 7.290493068870143e-06, + "loss": 0.04557037353515625, + "step": 10108 + }, + { + "epoch": 0.6833175611734487, + "grad_norm": 0.5097371339797974, + "learning_rate": 7.287662885835112e-06, + "loss": 0.07843399047851562, + "step": 10109 + }, + { + "epoch": 0.6833851561443829, + "grad_norm": 0.4863324463367462, + "learning_rate": 7.284833075970915e-06, + "loss": 0.07522201538085938, + "step": 10110 + }, + { + "epoch": 0.683452751115317, + "grad_norm": 0.8069348931312561, + "learning_rate": 7.282003639414472e-06, + "loss": 0.15228271484375, + "step": 10111 + }, + { + "epoch": 0.6835203460862512, + "grad_norm": 0.3583773970603943, + "learning_rate": 7.279174576302692e-06, + "loss": 0.05666160583496094, + "step": 10112 + }, + { + "epoch": 0.6835879410571853, + "grad_norm": 1.123583436012268, + "learning_rate": 7.27634588677246e-06, + "loss": 0.174713134765625, + "step": 10113 + }, + { + "epoch": 0.6836555360281195, + "grad_norm": 0.35870227217674255, + "learning_rate": 7.273517570960644e-06, + "loss": 0.046054840087890625, + "step": 10114 + }, + { + "epoch": 0.6837231309990537, + "grad_norm": 0.5903704166412354, + "learning_rate": 7.2706896290041065e-06, + "loss": 0.1225128173828125, + "step": 10115 + }, + { + "epoch": 0.6837907259699878, + "grad_norm": 0.5002287030220032, + "learning_rate": 7.267862061039664e-06, + "loss": 0.08992767333984375, + "step": 10116 + }, + { + "epoch": 0.683858320940922, + "grad_norm": 1.0934884548187256, + "learning_rate": 7.265034867204144e-06, + "loss": 0.19439697265625, + "step": 10117 + }, + { + "epoch": 0.6839259159118561, + "grad_norm": 0.4120820164680481, + "learning_rate": 7.262208047634342e-06, + "loss": 0.064544677734375, + "step": 10118 + }, + { + "epoch": 0.6839935108827904, + "grad_norm": 0.44472214579582214, + "learning_rate": 7.259381602467034e-06, + "loss": 0.090576171875, + "step": 10119 + }, + { + "epoch": 0.6840611058537245, + "grad_norm": 0.6813384294509888, + "learning_rate": 7.256555531838983e-06, + "loss": 0.1091766357421875, + "step": 10120 + }, + { + "epoch": 0.6841287008246586, + "grad_norm": 0.8509536981582642, + "learning_rate": 7.253729835886931e-06, + "loss": 0.14204788208007812, + "step": 10121 + }, + { + "epoch": 0.6841962957955928, + "grad_norm": 0.4540179371833801, + "learning_rate": 7.250904514747604e-06, + "loss": 0.087127685546875, + "step": 10122 + }, + { + "epoch": 0.6842638907665269, + "grad_norm": 0.8982973694801331, + "learning_rate": 7.248079568557709e-06, + "loss": 0.1423797607421875, + "step": 10123 + }, + { + "epoch": 0.6843314857374612, + "grad_norm": 1.0733156204223633, + "learning_rate": 7.245254997453931e-06, + "loss": 0.13649749755859375, + "step": 10124 + }, + { + "epoch": 0.6843990807083953, + "grad_norm": 1.3561657667160034, + "learning_rate": 7.242430801572944e-06, + "loss": 0.20050048828125, + "step": 10125 + }, + { + "epoch": 0.6844666756793295, + "grad_norm": 0.3545820116996765, + "learning_rate": 7.239606981051396e-06, + "loss": 0.05495643615722656, + "step": 10126 + }, + { + "epoch": 0.6845342706502636, + "grad_norm": 0.7302165627479553, + "learning_rate": 7.23678353602592e-06, + "loss": 0.13127517700195312, + "step": 10127 + }, + { + "epoch": 0.6846018656211977, + "grad_norm": 0.59772789478302, + "learning_rate": 7.233960466633146e-06, + "loss": 0.101776123046875, + "step": 10128 + }, + { + "epoch": 0.684669460592132, + "grad_norm": 1.0808212757110596, + "learning_rate": 7.231137773009651e-06, + "loss": 0.17118072509765625, + "step": 10129 + }, + { + "epoch": 0.6847370555630661, + "grad_norm": 0.9208992123603821, + "learning_rate": 7.228315455292028e-06, + "loss": 0.20404052734375, + "step": 10130 + }, + { + "epoch": 0.6848046505340003, + "grad_norm": 0.46227240562438965, + "learning_rate": 7.225493513616838e-06, + "loss": 0.061389923095703125, + "step": 10131 + }, + { + "epoch": 0.6848722455049344, + "grad_norm": 0.3147408366203308, + "learning_rate": 7.222671948120619e-06, + "loss": 0.0483856201171875, + "step": 10132 + }, + { + "epoch": 0.6849398404758686, + "grad_norm": 0.2760336697101593, + "learning_rate": 7.219850758939898e-06, + "loss": 0.052276611328125, + "step": 10133 + }, + { + "epoch": 0.6850074354468028, + "grad_norm": 1.5544724464416504, + "learning_rate": 7.21702994621118e-06, + "loss": 0.252410888671875, + "step": 10134 + }, + { + "epoch": 0.6850750304177369, + "grad_norm": 0.2505497932434082, + "learning_rate": 7.214209510070957e-06, + "loss": 0.027914047241210938, + "step": 10135 + }, + { + "epoch": 0.6851426253886711, + "grad_norm": 0.6545432209968567, + "learning_rate": 7.211389450655698e-06, + "loss": 0.08160781860351562, + "step": 10136 + }, + { + "epoch": 0.6852102203596052, + "grad_norm": 0.27545347809791565, + "learning_rate": 7.208569768101853e-06, + "loss": 0.05661773681640625, + "step": 10137 + }, + { + "epoch": 0.6852778153305394, + "grad_norm": 0.5436789989471436, + "learning_rate": 7.205750462545856e-06, + "loss": 0.101776123046875, + "step": 10138 + }, + { + "epoch": 0.6853454103014736, + "grad_norm": 0.3245568573474884, + "learning_rate": 7.202931534124126e-06, + "loss": 0.048519134521484375, + "step": 10139 + }, + { + "epoch": 0.6854130052724078, + "grad_norm": 0.6675507426261902, + "learning_rate": 7.200112982973051e-06, + "loss": 0.1199188232421875, + "step": 10140 + }, + { + "epoch": 0.6854806002433419, + "grad_norm": 0.5577110052108765, + "learning_rate": 7.197294809229026e-06, + "loss": 0.1067352294921875, + "step": 10141 + }, + { + "epoch": 0.685548195214276, + "grad_norm": 0.9546416997909546, + "learning_rate": 7.194477013028394e-06, + "loss": 0.13094234466552734, + "step": 10142 + }, + { + "epoch": 0.6856157901852102, + "grad_norm": 0.9367544054985046, + "learning_rate": 7.191659594507511e-06, + "loss": 0.146575927734375, + "step": 10143 + }, + { + "epoch": 0.6856833851561444, + "grad_norm": 0.8389649987220764, + "learning_rate": 7.188842553802701e-06, + "loss": 0.12352371215820312, + "step": 10144 + }, + { + "epoch": 0.6857509801270786, + "grad_norm": 0.6610418558120728, + "learning_rate": 7.186025891050255e-06, + "loss": 0.10118484497070312, + "step": 10145 + }, + { + "epoch": 0.6858185750980127, + "grad_norm": 0.4047779142856598, + "learning_rate": 7.183209606386478e-06, + "loss": 0.07367277145385742, + "step": 10146 + }, + { + "epoch": 0.6858861700689469, + "grad_norm": 1.4694056510925293, + "learning_rate": 7.18039369994763e-06, + "loss": 0.1898040771484375, + "step": 10147 + }, + { + "epoch": 0.685953765039881, + "grad_norm": 1.3549482822418213, + "learning_rate": 7.177578171869967e-06, + "loss": 0.1681976318359375, + "step": 10148 + }, + { + "epoch": 0.6860213600108152, + "grad_norm": 0.5664791464805603, + "learning_rate": 7.174763022289716e-06, + "loss": 0.1096649169921875, + "step": 10149 + }, + { + "epoch": 0.6860889549817494, + "grad_norm": 0.33045732975006104, + "learning_rate": 7.171948251343096e-06, + "loss": 0.062374114990234375, + "step": 10150 + }, + { + "epoch": 0.6861565499526835, + "grad_norm": 0.612118661403656, + "learning_rate": 7.169133859166296e-06, + "loss": 0.09368133544921875, + "step": 10151 + }, + { + "epoch": 0.6862241449236177, + "grad_norm": 0.1915111541748047, + "learning_rate": 7.166319845895512e-06, + "loss": 0.01726245880126953, + "step": 10152 + }, + { + "epoch": 0.6862917398945518, + "grad_norm": 0.6162632703781128, + "learning_rate": 7.163506211666878e-06, + "loss": 0.1108551025390625, + "step": 10153 + }, + { + "epoch": 0.6863593348654861, + "grad_norm": 0.30057522654533386, + "learning_rate": 7.1606929566165605e-06, + "loss": 0.0485382080078125, + "step": 10154 + }, + { + "epoch": 0.6864269298364202, + "grad_norm": 0.3376614451408386, + "learning_rate": 7.157880080880658e-06, + "loss": 0.05350494384765625, + "step": 10155 + }, + { + "epoch": 0.6864945248073543, + "grad_norm": 1.1961055994033813, + "learning_rate": 7.1550675845952935e-06, + "loss": 0.1805267333984375, + "step": 10156 + }, + { + "epoch": 0.6865621197782885, + "grad_norm": 0.40279752016067505, + "learning_rate": 7.152255467896551e-06, + "loss": 0.07301712036132812, + "step": 10157 + }, + { + "epoch": 0.6866297147492226, + "grad_norm": 0.5408746004104614, + "learning_rate": 7.149443730920483e-06, + "loss": 0.09450531005859375, + "step": 10158 + }, + { + "epoch": 0.6866973097201569, + "grad_norm": 0.5925894379615784, + "learning_rate": 7.146632373803155e-06, + "loss": 0.1043701171875, + "step": 10159 + }, + { + "epoch": 0.686764904691091, + "grad_norm": 0.973439633846283, + "learning_rate": 7.14382139668059e-06, + "loss": 0.175750732421875, + "step": 10160 + }, + { + "epoch": 0.6868324996620252, + "grad_norm": 0.3953302502632141, + "learning_rate": 7.141010799688806e-06, + "loss": 0.080657958984375, + "step": 10161 + }, + { + "epoch": 0.6869000946329593, + "grad_norm": 0.26976117491722107, + "learning_rate": 7.138200582963792e-06, + "loss": 0.03928375244140625, + "step": 10162 + }, + { + "epoch": 0.6869676896038934, + "grad_norm": 0.29674232006073, + "learning_rate": 7.135390746641527e-06, + "loss": 0.03347015380859375, + "step": 10163 + }, + { + "epoch": 0.6870352845748277, + "grad_norm": 0.5922645926475525, + "learning_rate": 7.13258129085796e-06, + "loss": 0.11891365051269531, + "step": 10164 + }, + { + "epoch": 0.6871028795457618, + "grad_norm": 0.30306002497673035, + "learning_rate": 7.129772215749051e-06, + "loss": 0.04749488830566406, + "step": 10165 + }, + { + "epoch": 0.687170474516696, + "grad_norm": 1.163629412651062, + "learning_rate": 7.126963521450693e-06, + "loss": 0.12665557861328125, + "step": 10166 + }, + { + "epoch": 0.6872380694876301, + "grad_norm": 0.2863081097602844, + "learning_rate": 7.1241552080988086e-06, + "loss": 0.036327362060546875, + "step": 10167 + }, + { + "epoch": 0.6873056644585643, + "grad_norm": 0.7564204335212708, + "learning_rate": 7.12134727582928e-06, + "loss": 0.162841796875, + "step": 10168 + }, + { + "epoch": 0.6873732594294985, + "grad_norm": 1.0519040822982788, + "learning_rate": 7.118539724777956e-06, + "loss": 0.15885162353515625, + "step": 10169 + }, + { + "epoch": 0.6874408544004326, + "grad_norm": 0.8123661279678345, + "learning_rate": 7.115732555080705e-06, + "loss": 0.124053955078125, + "step": 10170 + }, + { + "epoch": 0.6875084493713668, + "grad_norm": 0.7357620596885681, + "learning_rate": 7.112925766873334e-06, + "loss": 0.137786865234375, + "step": 10171 + }, + { + "epoch": 0.6875760443423009, + "grad_norm": 0.45459359884262085, + "learning_rate": 7.110119360291669e-06, + "loss": 0.05828857421875, + "step": 10172 + }, + { + "epoch": 0.6876436393132351, + "grad_norm": 0.1702205240726471, + "learning_rate": 7.1073133354714944e-06, + "loss": 0.027500152587890625, + "step": 10173 + }, + { + "epoch": 0.6877112342841692, + "grad_norm": 0.4039282500743866, + "learning_rate": 7.104507692548587e-06, + "loss": 0.080291748046875, + "step": 10174 + }, + { + "epoch": 0.6877788292551035, + "grad_norm": 0.22712120413780212, + "learning_rate": 7.101702431658698e-06, + "loss": 0.036090850830078125, + "step": 10175 + }, + { + "epoch": 0.6878464242260376, + "grad_norm": 0.32819682359695435, + "learning_rate": 7.098897552937564e-06, + "loss": 0.044830322265625, + "step": 10176 + }, + { + "epoch": 0.6879140191969717, + "grad_norm": 0.7519316673278809, + "learning_rate": 7.096093056520899e-06, + "loss": 0.1113739013671875, + "step": 10177 + }, + { + "epoch": 0.6879816141679059, + "grad_norm": 0.271948903799057, + "learning_rate": 7.093288942544415e-06, + "loss": 0.07242584228515625, + "step": 10178 + }, + { + "epoch": 0.68804920913884, + "grad_norm": 1.7253862619400024, + "learning_rate": 7.090485211143777e-06, + "loss": 0.1518230438232422, + "step": 10179 + }, + { + "epoch": 0.6881168041097743, + "grad_norm": 1.6283419132232666, + "learning_rate": 7.0876818624546496e-06, + "loss": 0.225830078125, + "step": 10180 + }, + { + "epoch": 0.6881843990807084, + "grad_norm": 0.5372217297554016, + "learning_rate": 7.084878896612691e-06, + "loss": 0.0829010009765625, + "step": 10181 + }, + { + "epoch": 0.6882519940516426, + "grad_norm": 0.5362977981567383, + "learning_rate": 7.082076313753503e-06, + "loss": 0.1136932373046875, + "step": 10182 + }, + { + "epoch": 0.6883195890225767, + "grad_norm": 0.15144015848636627, + "learning_rate": 7.0792741140127165e-06, + "loss": 0.028484344482421875, + "step": 10183 + }, + { + "epoch": 0.6883871839935108, + "grad_norm": 0.5842128992080688, + "learning_rate": 7.076472297525894e-06, + "loss": 0.076507568359375, + "step": 10184 + }, + { + "epoch": 0.6884547789644451, + "grad_norm": 0.9211127758026123, + "learning_rate": 7.073670864428623e-06, + "loss": 0.10203742980957031, + "step": 10185 + }, + { + "epoch": 0.6885223739353792, + "grad_norm": 1.4451324939727783, + "learning_rate": 7.070869814856451e-06, + "loss": 0.1805419921875, + "step": 10186 + }, + { + "epoch": 0.6885899689063134, + "grad_norm": 1.0847878456115723, + "learning_rate": 7.068069148944906e-06, + "loss": 0.1446075439453125, + "step": 10187 + }, + { + "epoch": 0.6886575638772475, + "grad_norm": 1.3265717029571533, + "learning_rate": 7.065268866829505e-06, + "loss": 0.1296539306640625, + "step": 10188 + }, + { + "epoch": 0.6887251588481816, + "grad_norm": 0.3067379593849182, + "learning_rate": 7.0624689686457405e-06, + "loss": 0.054126739501953125, + "step": 10189 + }, + { + "epoch": 0.6887927538191159, + "grad_norm": 0.5026538372039795, + "learning_rate": 7.0596694545290855e-06, + "loss": 0.11163330078125, + "step": 10190 + }, + { + "epoch": 0.68886034879005, + "grad_norm": 0.8034418821334839, + "learning_rate": 7.056870324615014e-06, + "loss": 0.13668060302734375, + "step": 10191 + }, + { + "epoch": 0.6889279437609842, + "grad_norm": 0.7687631845474243, + "learning_rate": 7.0540715790389486e-06, + "loss": 0.10803508758544922, + "step": 10192 + }, + { + "epoch": 0.6889955387319183, + "grad_norm": 0.35201317071914673, + "learning_rate": 7.05127321793631e-06, + "loss": 0.0496826171875, + "step": 10193 + }, + { + "epoch": 0.6890631337028525, + "grad_norm": 0.43266457319259644, + "learning_rate": 7.048475241442516e-06, + "loss": 0.06652069091796875, + "step": 10194 + }, + { + "epoch": 0.6891307286737867, + "grad_norm": 0.4178055226802826, + "learning_rate": 7.045677649692928e-06, + "loss": 0.0952911376953125, + "step": 10195 + }, + { + "epoch": 0.6891983236447208, + "grad_norm": 0.3548545241355896, + "learning_rate": 7.0428804428229306e-06, + "loss": 0.06768035888671875, + "step": 10196 + }, + { + "epoch": 0.689265918615655, + "grad_norm": 1.0168637037277222, + "learning_rate": 7.040083620967861e-06, + "loss": 0.220062255859375, + "step": 10197 + }, + { + "epoch": 0.6893335135865891, + "grad_norm": 0.6345630288124084, + "learning_rate": 7.0372871842630465e-06, + "loss": 0.1397247314453125, + "step": 10198 + }, + { + "epoch": 0.6894011085575233, + "grad_norm": 0.26220637559890747, + "learning_rate": 7.034491132843799e-06, + "loss": 0.03510284423828125, + "step": 10199 + }, + { + "epoch": 0.6894687035284575, + "grad_norm": 0.7497926354408264, + "learning_rate": 7.031695466845408e-06, + "loss": 0.1236114501953125, + "step": 10200 + }, + { + "epoch": 0.6895362984993917, + "grad_norm": 1.078712821006775, + "learning_rate": 7.028900186403145e-06, + "loss": 0.130523681640625, + "step": 10201 + }, + { + "epoch": 0.6896038934703258, + "grad_norm": 1.7394484281539917, + "learning_rate": 7.026105291652261e-06, + "loss": 0.14070510864257812, + "step": 10202 + }, + { + "epoch": 0.6896714884412599, + "grad_norm": 0.7432118058204651, + "learning_rate": 7.023310782727993e-06, + "loss": 0.1350860595703125, + "step": 10203 + }, + { + "epoch": 0.6897390834121941, + "grad_norm": 0.3289888799190521, + "learning_rate": 7.0205166597655555e-06, + "loss": 0.0307464599609375, + "step": 10204 + }, + { + "epoch": 0.6898066783831283, + "grad_norm": 1.399703025817871, + "learning_rate": 7.017722922900146e-06, + "loss": 0.15121841430664062, + "step": 10205 + }, + { + "epoch": 0.6898742733540625, + "grad_norm": 0.3192121088504791, + "learning_rate": 7.014929572266938e-06, + "loss": 0.044403076171875, + "step": 10206 + }, + { + "epoch": 0.6899418683249966, + "grad_norm": 0.570501446723938, + "learning_rate": 7.012136608001107e-06, + "loss": 0.08832550048828125, + "step": 10207 + }, + { + "epoch": 0.6900094632959308, + "grad_norm": 0.40661340951919556, + "learning_rate": 7.00934403023777e-06, + "loss": 0.06942367553710938, + "step": 10208 + }, + { + "epoch": 0.6900770582668649, + "grad_norm": 1.1327259540557861, + "learning_rate": 7.0065518391120695e-06, + "loss": 0.15407943725585938, + "step": 10209 + }, + { + "epoch": 0.690144653237799, + "grad_norm": 0.7591407299041748, + "learning_rate": 7.0037600347591e-06, + "loss": 0.15020751953125, + "step": 10210 + }, + { + "epoch": 0.6902122482087333, + "grad_norm": 0.25120776891708374, + "learning_rate": 7.000968617313951e-06, + "loss": 0.04541015625, + "step": 10211 + }, + { + "epoch": 0.6902798431796674, + "grad_norm": 1.0822914838790894, + "learning_rate": 6.9981775869116836e-06, + "loss": 0.13449859619140625, + "step": 10212 + }, + { + "epoch": 0.6903474381506016, + "grad_norm": 0.23354867100715637, + "learning_rate": 6.9953869436873475e-06, + "loss": 0.0409088134765625, + "step": 10213 + }, + { + "epoch": 0.6904150331215357, + "grad_norm": 0.7974798083305359, + "learning_rate": 6.99259668777597e-06, + "loss": 0.142669677734375, + "step": 10214 + }, + { + "epoch": 0.69048262809247, + "grad_norm": 0.2816285490989685, + "learning_rate": 6.989806819312564e-06, + "loss": 0.03952789306640625, + "step": 10215 + }, + { + "epoch": 0.6905502230634041, + "grad_norm": 0.19862981140613556, + "learning_rate": 6.987017338432118e-06, + "loss": 0.025959014892578125, + "step": 10216 + }, + { + "epoch": 0.6906178180343382, + "grad_norm": 0.6766272783279419, + "learning_rate": 6.984228245269606e-06, + "loss": 0.08068084716796875, + "step": 10217 + }, + { + "epoch": 0.6906854130052724, + "grad_norm": 0.2632148861885071, + "learning_rate": 6.9814395399599816e-06, + "loss": 0.03304290771484375, + "step": 10218 + }, + { + "epoch": 0.6907530079762065, + "grad_norm": 0.9817792773246765, + "learning_rate": 6.9786512226381725e-06, + "loss": 0.171173095703125, + "step": 10219 + }, + { + "epoch": 0.6908206029471408, + "grad_norm": 0.6008085012435913, + "learning_rate": 6.975863293439115e-06, + "loss": 0.128570556640625, + "step": 10220 + }, + { + "epoch": 0.6908881979180749, + "grad_norm": 0.2610348165035248, + "learning_rate": 6.97307575249768e-06, + "loss": 0.050830841064453125, + "step": 10221 + }, + { + "epoch": 0.6909557928890091, + "grad_norm": 0.6919435858726501, + "learning_rate": 6.970288599948762e-06, + "loss": 0.1237030029296875, + "step": 10222 + }, + { + "epoch": 0.6910233878599432, + "grad_norm": 0.6999955773353577, + "learning_rate": 6.9675018359272205e-06, + "loss": 0.0989227294921875, + "step": 10223 + }, + { + "epoch": 0.6910909828308773, + "grad_norm": 0.8100818991661072, + "learning_rate": 6.964715460567895e-06, + "loss": 0.137237548828125, + "step": 10224 + }, + { + "epoch": 0.6911585778018116, + "grad_norm": 1.0704853534698486, + "learning_rate": 6.961929474005605e-06, + "loss": 0.18646240234375, + "step": 10225 + }, + { + "epoch": 0.6912261727727457, + "grad_norm": 0.4012845456600189, + "learning_rate": 6.9591438763751555e-06, + "loss": 0.07242584228515625, + "step": 10226 + }, + { + "epoch": 0.6912937677436799, + "grad_norm": 0.609947681427002, + "learning_rate": 6.95635866781133e-06, + "loss": 0.07687759399414062, + "step": 10227 + }, + { + "epoch": 0.691361362714614, + "grad_norm": 1.248740553855896, + "learning_rate": 6.953573848448895e-06, + "loss": 0.18145751953125, + "step": 10228 + }, + { + "epoch": 0.6914289576855482, + "grad_norm": 0.3149305582046509, + "learning_rate": 6.950789418422598e-06, + "loss": 0.062252044677734375, + "step": 10229 + }, + { + "epoch": 0.6914965526564824, + "grad_norm": 1.4572995901107788, + "learning_rate": 6.948005377867166e-06, + "loss": 0.11837005615234375, + "step": 10230 + }, + { + "epoch": 0.6915641476274165, + "grad_norm": 0.4359709322452545, + "learning_rate": 6.94522172691731e-06, + "loss": 0.07372665405273438, + "step": 10231 + }, + { + "epoch": 0.6916317425983507, + "grad_norm": 0.38534265756607056, + "learning_rate": 6.942438465707712e-06, + "loss": 0.0574798583984375, + "step": 10232 + }, + { + "epoch": 0.6916993375692848, + "grad_norm": 0.9091818928718567, + "learning_rate": 6.939655594373061e-06, + "loss": 0.1128082275390625, + "step": 10233 + }, + { + "epoch": 0.691766932540219, + "grad_norm": 1.2078051567077637, + "learning_rate": 6.93687311304799e-06, + "loss": 0.15145111083984375, + "step": 10234 + }, + { + "epoch": 0.6918345275111532, + "grad_norm": 1.7687305212020874, + "learning_rate": 6.934091021867146e-06, + "loss": 0.261016845703125, + "step": 10235 + }, + { + "epoch": 0.6919021224820874, + "grad_norm": 0.6497203707695007, + "learning_rate": 6.93130932096514e-06, + "loss": 0.1218414306640625, + "step": 10236 + }, + { + "epoch": 0.6919697174530215, + "grad_norm": 0.537194013595581, + "learning_rate": 6.928528010476568e-06, + "loss": 0.0967559814453125, + "step": 10237 + }, + { + "epoch": 0.6920373124239556, + "grad_norm": 0.8207828998565674, + "learning_rate": 6.9257470905360075e-06, + "loss": 0.167877197265625, + "step": 10238 + }, + { + "epoch": 0.6921049073948898, + "grad_norm": 1.3806267976760864, + "learning_rate": 6.922966561278014e-06, + "loss": 0.13420963287353516, + "step": 10239 + }, + { + "epoch": 0.692172502365824, + "grad_norm": 0.2955687940120697, + "learning_rate": 6.92018642283713e-06, + "loss": 0.03301239013671875, + "step": 10240 + }, + { + "epoch": 0.6922400973367582, + "grad_norm": 0.43785202503204346, + "learning_rate": 6.9174066753478745e-06, + "loss": 0.0821075439453125, + "step": 10241 + }, + { + "epoch": 0.6923076923076923, + "grad_norm": 0.24370238184928894, + "learning_rate": 6.91462731894475e-06, + "loss": 0.04489898681640625, + "step": 10242 + }, + { + "epoch": 0.6923752872786265, + "grad_norm": 0.34598153829574585, + "learning_rate": 6.911848353762231e-06, + "loss": 0.05252838134765625, + "step": 10243 + }, + { + "epoch": 0.6924428822495606, + "grad_norm": 0.4624224305152893, + "learning_rate": 6.909069779934801e-06, + "loss": 0.0791473388671875, + "step": 10244 + }, + { + "epoch": 0.6925104772204947, + "grad_norm": 0.9355909824371338, + "learning_rate": 6.90629159759688e-06, + "loss": 0.154693603515625, + "step": 10245 + }, + { + "epoch": 0.692578072191429, + "grad_norm": 0.9078533053398132, + "learning_rate": 6.903513806882916e-06, + "loss": 0.18621826171875, + "step": 10246 + }, + { + "epoch": 0.6926456671623631, + "grad_norm": 0.3520786762237549, + "learning_rate": 6.9007364079272956e-06, + "loss": 0.05678558349609375, + "step": 10247 + }, + { + "epoch": 0.6927132621332973, + "grad_norm": 0.7026455402374268, + "learning_rate": 6.897959400864423e-06, + "loss": 0.1643524169921875, + "step": 10248 + }, + { + "epoch": 0.6927808571042314, + "grad_norm": 0.5889096260070801, + "learning_rate": 6.895182785828664e-06, + "loss": 0.08629608154296875, + "step": 10249 + }, + { + "epoch": 0.6928484520751657, + "grad_norm": 0.3470822870731354, + "learning_rate": 6.892406562954356e-06, + "loss": 0.0740203857421875, + "step": 10250 + }, + { + "epoch": 0.6929160470460998, + "grad_norm": 0.560433566570282, + "learning_rate": 6.889630732375844e-06, + "loss": 0.1165313720703125, + "step": 10251 + }, + { + "epoch": 0.6929836420170339, + "grad_norm": 0.730903148651123, + "learning_rate": 6.886855294227435e-06, + "loss": 0.1356201171875, + "step": 10252 + }, + { + "epoch": 0.6930512369879681, + "grad_norm": 0.4975292980670929, + "learning_rate": 6.884080248643422e-06, + "loss": 0.08838272094726562, + "step": 10253 + }, + { + "epoch": 0.6931188319589022, + "grad_norm": 0.19598059356212616, + "learning_rate": 6.881305595758077e-06, + "loss": 0.034515380859375, + "step": 10254 + }, + { + "epoch": 0.6931864269298365, + "grad_norm": 0.6733649969100952, + "learning_rate": 6.878531335705659e-06, + "loss": 0.13001251220703125, + "step": 10255 + }, + { + "epoch": 0.6932540219007706, + "grad_norm": 0.25638774037361145, + "learning_rate": 6.875757468620396e-06, + "loss": 0.026638031005859375, + "step": 10256 + }, + { + "epoch": 0.6933216168717048, + "grad_norm": 0.3155984580516815, + "learning_rate": 6.872983994636523e-06, + "loss": 0.045703887939453125, + "step": 10257 + }, + { + "epoch": 0.6933892118426389, + "grad_norm": 0.4135288596153259, + "learning_rate": 6.870210913888212e-06, + "loss": 0.053134918212890625, + "step": 10258 + }, + { + "epoch": 0.693456806813573, + "grad_norm": 0.715381383895874, + "learning_rate": 6.867438226509668e-06, + "loss": 0.190185546875, + "step": 10259 + }, + { + "epoch": 0.6935244017845072, + "grad_norm": 0.7786675691604614, + "learning_rate": 6.864665932635029e-06, + "loss": 0.186309814453125, + "step": 10260 + }, + { + "epoch": 0.6935919967554414, + "grad_norm": 0.517098605632782, + "learning_rate": 6.861894032398448e-06, + "loss": 0.0489654541015625, + "step": 10261 + }, + { + "epoch": 0.6936595917263756, + "grad_norm": 0.8599514365196228, + "learning_rate": 6.859122525934052e-06, + "loss": 0.152435302734375, + "step": 10262 + }, + { + "epoch": 0.6937271866973097, + "grad_norm": 0.9666953682899475, + "learning_rate": 6.856351413375923e-06, + "loss": 0.165435791015625, + "step": 10263 + }, + { + "epoch": 0.6937947816682439, + "grad_norm": 0.8374520540237427, + "learning_rate": 6.853580694858163e-06, + "loss": 0.132720947265625, + "step": 10264 + }, + { + "epoch": 0.693862376639178, + "grad_norm": 0.23265190422534943, + "learning_rate": 6.850810370514832e-06, + "loss": 0.037166595458984375, + "step": 10265 + }, + { + "epoch": 0.6939299716101122, + "grad_norm": 0.6271800398826599, + "learning_rate": 6.848040440479975e-06, + "loss": 0.09518051147460938, + "step": 10266 + }, + { + "epoch": 0.6939975665810464, + "grad_norm": 1.231361985206604, + "learning_rate": 6.845270904887617e-06, + "loss": 0.1690673828125, + "step": 10267 + }, + { + "epoch": 0.6940651615519805, + "grad_norm": 0.5078712105751038, + "learning_rate": 6.842501763871766e-06, + "loss": 0.0852203369140625, + "step": 10268 + }, + { + "epoch": 0.6941327565229147, + "grad_norm": 0.3632071614265442, + "learning_rate": 6.839733017566406e-06, + "loss": 0.05999565124511719, + "step": 10269 + }, + { + "epoch": 0.6942003514938488, + "grad_norm": 0.19243626296520233, + "learning_rate": 6.836964666105521e-06, + "loss": 0.021671295166015625, + "step": 10270 + }, + { + "epoch": 0.6942679464647831, + "grad_norm": 0.4403734803199768, + "learning_rate": 6.8341967096230405e-06, + "loss": 0.08933258056640625, + "step": 10271 + }, + { + "epoch": 0.6943355414357172, + "grad_norm": 0.8367125391960144, + "learning_rate": 6.831429148252911e-06, + "loss": 0.1405792236328125, + "step": 10272 + }, + { + "epoch": 0.6944031364066513, + "grad_norm": 0.4939797520637512, + "learning_rate": 6.8286619821290445e-06, + "loss": 0.0855865478515625, + "step": 10273 + }, + { + "epoch": 0.6944707313775855, + "grad_norm": 0.3664144277572632, + "learning_rate": 6.825895211385318e-06, + "loss": 0.07759857177734375, + "step": 10274 + }, + { + "epoch": 0.6945383263485196, + "grad_norm": 0.34669333696365356, + "learning_rate": 6.823128836155624e-06, + "loss": 0.06310272216796875, + "step": 10275 + }, + { + "epoch": 0.6946059213194539, + "grad_norm": 0.1556839793920517, + "learning_rate": 6.8203628565737995e-06, + "loss": 0.02497100830078125, + "step": 10276 + }, + { + "epoch": 0.694673516290388, + "grad_norm": 0.21725502610206604, + "learning_rate": 6.817597272773693e-06, + "loss": 0.022947311401367188, + "step": 10277 + }, + { + "epoch": 0.6947411112613222, + "grad_norm": 0.7643296122550964, + "learning_rate": 6.814832084889118e-06, + "loss": 0.08731460571289062, + "step": 10278 + }, + { + "epoch": 0.6948087062322563, + "grad_norm": 0.40899893641471863, + "learning_rate": 6.812067293053868e-06, + "loss": 0.0616912841796875, + "step": 10279 + }, + { + "epoch": 0.6948763012031904, + "grad_norm": 0.2937043607234955, + "learning_rate": 6.809302897401723e-06, + "loss": 0.04467010498046875, + "step": 10280 + }, + { + "epoch": 0.6949438961741247, + "grad_norm": 0.3708837926387787, + "learning_rate": 6.806538898066443e-06, + "loss": 0.0618743896484375, + "step": 10281 + }, + { + "epoch": 0.6950114911450588, + "grad_norm": 0.8141721487045288, + "learning_rate": 6.803775295181758e-06, + "loss": 0.14411163330078125, + "step": 10282 + }, + { + "epoch": 0.695079086115993, + "grad_norm": 0.47881659865379333, + "learning_rate": 6.801012088881407e-06, + "loss": 0.08023834228515625, + "step": 10283 + }, + { + "epoch": 0.6951466810869271, + "grad_norm": 0.38715365529060364, + "learning_rate": 6.798249279299076e-06, + "loss": 0.0635528564453125, + "step": 10284 + }, + { + "epoch": 0.6952142760578613, + "grad_norm": 0.9344268441200256, + "learning_rate": 6.795486866568444e-06, + "loss": 0.14754486083984375, + "step": 10285 + }, + { + "epoch": 0.6952818710287955, + "grad_norm": 1.345168113708496, + "learning_rate": 6.792724850823192e-06, + "loss": 0.16192626953125, + "step": 10286 + }, + { + "epoch": 0.6953494659997296, + "grad_norm": 0.7243432402610779, + "learning_rate": 6.789963232196943e-06, + "loss": 0.1085205078125, + "step": 10287 + }, + { + "epoch": 0.6954170609706638, + "grad_norm": 1.3208949565887451, + "learning_rate": 6.787202010823335e-06, + "loss": 0.194366455078125, + "step": 10288 + }, + { + "epoch": 0.6954846559415979, + "grad_norm": 0.4563693106174469, + "learning_rate": 6.784441186835967e-06, + "loss": 0.0774993896484375, + "step": 10289 + }, + { + "epoch": 0.6955522509125321, + "grad_norm": 0.7536649107933044, + "learning_rate": 6.7816807603684275e-06, + "loss": 0.141845703125, + "step": 10290 + }, + { + "epoch": 0.6956198458834663, + "grad_norm": 0.8674235939979553, + "learning_rate": 6.778920731554283e-06, + "loss": 0.13592529296875, + "step": 10291 + }, + { + "epoch": 0.6956874408544005, + "grad_norm": 0.9297773838043213, + "learning_rate": 6.7761611005270785e-06, + "loss": 0.10788726806640625, + "step": 10292 + }, + { + "epoch": 0.6957550358253346, + "grad_norm": 0.25383612513542175, + "learning_rate": 6.773401867420345e-06, + "loss": 0.025130033493041992, + "step": 10293 + }, + { + "epoch": 0.6958226307962687, + "grad_norm": 0.7242276072502136, + "learning_rate": 6.770643032367587e-06, + "loss": 0.10112380981445312, + "step": 10294 + }, + { + "epoch": 0.6958902257672029, + "grad_norm": 0.663227379322052, + "learning_rate": 6.767884595502294e-06, + "loss": 0.10284042358398438, + "step": 10295 + }, + { + "epoch": 0.6959578207381371, + "grad_norm": 1.4264349937438965, + "learning_rate": 6.765126556957949e-06, + "loss": 0.09784698486328125, + "step": 10296 + }, + { + "epoch": 0.6960254157090713, + "grad_norm": 0.408634752035141, + "learning_rate": 6.7623689168679895e-06, + "loss": 0.0897369384765625, + "step": 10297 + }, + { + "epoch": 0.6960930106800054, + "grad_norm": 0.9759281873703003, + "learning_rate": 6.759611675365843e-06, + "loss": 0.1624755859375, + "step": 10298 + }, + { + "epoch": 0.6961606056509396, + "grad_norm": 0.5138081312179565, + "learning_rate": 6.756854832584943e-06, + "loss": 0.0777435302734375, + "step": 10299 + }, + { + "epoch": 0.6962282006218737, + "grad_norm": 0.8186566233634949, + "learning_rate": 6.754098388658658e-06, + "loss": 0.1620025634765625, + "step": 10300 + }, + { + "epoch": 0.6962957955928079, + "grad_norm": 0.768919825553894, + "learning_rate": 6.7513423437203795e-06, + "loss": 0.134765625, + "step": 10301 + }, + { + "epoch": 0.6963633905637421, + "grad_norm": 2.9786736965179443, + "learning_rate": 6.748586697903456e-06, + "loss": 0.20556640625, + "step": 10302 + }, + { + "epoch": 0.6964309855346762, + "grad_norm": 0.404409795999527, + "learning_rate": 6.7458314513412235e-06, + "loss": 0.08526611328125, + "step": 10303 + }, + { + "epoch": 0.6964985805056104, + "grad_norm": 0.3827763497829437, + "learning_rate": 6.743076604166997e-06, + "loss": 0.078155517578125, + "step": 10304 + }, + { + "epoch": 0.6965661754765445, + "grad_norm": 1.255603313446045, + "learning_rate": 6.740322156514073e-06, + "loss": 0.21142578125, + "step": 10305 + }, + { + "epoch": 0.6966337704474788, + "grad_norm": 0.7737876772880554, + "learning_rate": 6.737568108515725e-06, + "loss": 0.172088623046875, + "step": 10306 + }, + { + "epoch": 0.6967013654184129, + "grad_norm": 0.9256815910339355, + "learning_rate": 6.7348144603052264e-06, + "loss": 0.13016510009765625, + "step": 10307 + }, + { + "epoch": 0.696768960389347, + "grad_norm": 0.4828827381134033, + "learning_rate": 6.7320612120157995e-06, + "loss": 0.07009506225585938, + "step": 10308 + }, + { + "epoch": 0.6968365553602812, + "grad_norm": 0.5169121026992798, + "learning_rate": 6.729308363780669e-06, + "loss": 0.11719512939453125, + "step": 10309 + }, + { + "epoch": 0.6969041503312153, + "grad_norm": 0.8876779675483704, + "learning_rate": 6.726555915733033e-06, + "loss": 0.1641082763671875, + "step": 10310 + }, + { + "epoch": 0.6969717453021496, + "grad_norm": 0.44656726717948914, + "learning_rate": 6.72380386800607e-06, + "loss": 0.1053466796875, + "step": 10311 + }, + { + "epoch": 0.6970393402730837, + "grad_norm": 0.2840099036693573, + "learning_rate": 6.721052220732955e-06, + "loss": 0.039707183837890625, + "step": 10312 + }, + { + "epoch": 0.6971069352440179, + "grad_norm": 0.4971062242984772, + "learning_rate": 6.71830097404681e-06, + "loss": 0.1248016357421875, + "step": 10313 + }, + { + "epoch": 0.697174530214952, + "grad_norm": 0.3734072148799896, + "learning_rate": 6.7155501280807714e-06, + "loss": 0.0597991943359375, + "step": 10314 + }, + { + "epoch": 0.6972421251858861, + "grad_norm": 0.788058876991272, + "learning_rate": 6.712799682967938e-06, + "loss": 0.138641357421875, + "step": 10315 + }, + { + "epoch": 0.6973097201568204, + "grad_norm": 0.22167080640792847, + "learning_rate": 6.710049638841394e-06, + "loss": 0.048656463623046875, + "step": 10316 + }, + { + "epoch": 0.6973773151277545, + "grad_norm": 0.5878819823265076, + "learning_rate": 6.707299995834203e-06, + "loss": 0.103363037109375, + "step": 10317 + }, + { + "epoch": 0.6974449100986887, + "grad_norm": 0.3449210226535797, + "learning_rate": 6.70455075407941e-06, + "loss": 0.068939208984375, + "step": 10318 + }, + { + "epoch": 0.6975125050696228, + "grad_norm": 1.1234502792358398, + "learning_rate": 6.701801913710042e-06, + "loss": 0.205902099609375, + "step": 10319 + }, + { + "epoch": 0.6975801000405569, + "grad_norm": 0.24419301748275757, + "learning_rate": 6.699053474859101e-06, + "loss": 0.04141998291015625, + "step": 10320 + }, + { + "epoch": 0.6976476950114912, + "grad_norm": 0.23795312643051147, + "learning_rate": 6.6963054376595785e-06, + "loss": 0.030617237091064453, + "step": 10321 + }, + { + "epoch": 0.6977152899824253, + "grad_norm": 0.45309415459632874, + "learning_rate": 6.6935578022444365e-06, + "loss": 0.1050262451171875, + "step": 10322 + }, + { + "epoch": 0.6977828849533595, + "grad_norm": 0.5182491540908813, + "learning_rate": 6.690810568746626e-06, + "loss": 0.06549072265625, + "step": 10323 + }, + { + "epoch": 0.6978504799242936, + "grad_norm": 0.7612850069999695, + "learning_rate": 6.68806373729907e-06, + "loss": 0.12212753295898438, + "step": 10324 + }, + { + "epoch": 0.6979180748952278, + "grad_norm": 0.497986376285553, + "learning_rate": 6.685317308034691e-06, + "loss": 0.10971450805664062, + "step": 10325 + }, + { + "epoch": 0.697985669866162, + "grad_norm": 0.38180142641067505, + "learning_rate": 6.682571281086359e-06, + "loss": 0.074737548828125, + "step": 10326 + }, + { + "epoch": 0.6980532648370961, + "grad_norm": 0.503581702709198, + "learning_rate": 6.679825656586959e-06, + "loss": 0.0852813720703125, + "step": 10327 + }, + { + "epoch": 0.6981208598080303, + "grad_norm": 0.596473753452301, + "learning_rate": 6.677080434669337e-06, + "loss": 0.1071014404296875, + "step": 10328 + }, + { + "epoch": 0.6981884547789644, + "grad_norm": 0.25672221183776855, + "learning_rate": 6.674335615466323e-06, + "loss": 0.0473785400390625, + "step": 10329 + }, + { + "epoch": 0.6982560497498986, + "grad_norm": 0.6072518825531006, + "learning_rate": 6.67159119911073e-06, + "loss": 0.1262969970703125, + "step": 10330 + }, + { + "epoch": 0.6983236447208327, + "grad_norm": 0.7836451530456543, + "learning_rate": 6.6688471857353465e-06, + "loss": 0.12210845947265625, + "step": 10331 + }, + { + "epoch": 0.698391239691767, + "grad_norm": 1.601320743560791, + "learning_rate": 6.666103575472947e-06, + "loss": 0.1494293212890625, + "step": 10332 + }, + { + "epoch": 0.6984588346627011, + "grad_norm": 0.26305675506591797, + "learning_rate": 6.663360368456286e-06, + "loss": 0.04257965087890625, + "step": 10333 + }, + { + "epoch": 0.6985264296336352, + "grad_norm": 0.6683781147003174, + "learning_rate": 6.660617564818095e-06, + "loss": 0.1280059814453125, + "step": 10334 + }, + { + "epoch": 0.6985940246045694, + "grad_norm": 0.542012631893158, + "learning_rate": 6.657875164691083e-06, + "loss": 0.091217041015625, + "step": 10335 + }, + { + "epoch": 0.6986616195755035, + "grad_norm": 0.5228592157363892, + "learning_rate": 6.655133168207959e-06, + "loss": 0.09923553466796875, + "step": 10336 + }, + { + "epoch": 0.6987292145464378, + "grad_norm": 0.3731929063796997, + "learning_rate": 6.65239157550138e-06, + "loss": 0.07112884521484375, + "step": 10337 + }, + { + "epoch": 0.6987968095173719, + "grad_norm": 0.6852834820747375, + "learning_rate": 6.6496503867040185e-06, + "loss": 0.145782470703125, + "step": 10338 + }, + { + "epoch": 0.6988644044883061, + "grad_norm": 0.4381016492843628, + "learning_rate": 6.646909601948492e-06, + "loss": 0.05228996276855469, + "step": 10339 + }, + { + "epoch": 0.6989319994592402, + "grad_norm": 0.7123503684997559, + "learning_rate": 6.64416922136743e-06, + "loss": 0.1598052978515625, + "step": 10340 + }, + { + "epoch": 0.6989995944301743, + "grad_norm": 0.9915164113044739, + "learning_rate": 6.6414292450934275e-06, + "loss": 0.200714111328125, + "step": 10341 + }, + { + "epoch": 0.6990671894011086, + "grad_norm": 0.7017727494239807, + "learning_rate": 6.638689673259059e-06, + "loss": 0.133941650390625, + "step": 10342 + }, + { + "epoch": 0.6991347843720427, + "grad_norm": 0.22716973721981049, + "learning_rate": 6.6359505059968815e-06, + "loss": 0.03358268737792969, + "step": 10343 + }, + { + "epoch": 0.6992023793429769, + "grad_norm": 0.5650264024734497, + "learning_rate": 6.633211743439435e-06, + "loss": 0.1016998291015625, + "step": 10344 + }, + { + "epoch": 0.699269974313911, + "grad_norm": 0.18914994597434998, + "learning_rate": 6.630473385719237e-06, + "loss": 0.035099029541015625, + "step": 10345 + }, + { + "epoch": 0.6993375692848453, + "grad_norm": 0.6678860187530518, + "learning_rate": 6.627735432968785e-06, + "loss": 0.1300201416015625, + "step": 10346 + }, + { + "epoch": 0.6994051642557794, + "grad_norm": 0.5151667594909668, + "learning_rate": 6.62499788532056e-06, + "loss": 0.11907958984375, + "step": 10347 + }, + { + "epoch": 0.6994727592267135, + "grad_norm": 0.2836448848247528, + "learning_rate": 6.622260742907016e-06, + "loss": 0.029094696044921875, + "step": 10348 + }, + { + "epoch": 0.6995403541976477, + "grad_norm": 0.8177111744880676, + "learning_rate": 6.619524005860608e-06, + "loss": 0.107269287109375, + "step": 10349 + }, + { + "epoch": 0.6996079491685818, + "grad_norm": 0.42566707730293274, + "learning_rate": 6.616787674313735e-06, + "loss": 0.07278823852539062, + "step": 10350 + }, + { + "epoch": 0.699675544139516, + "grad_norm": 0.7246875762939453, + "learning_rate": 6.61405174839882e-06, + "loss": 0.1209869384765625, + "step": 10351 + }, + { + "epoch": 0.6997431391104502, + "grad_norm": 0.928876519203186, + "learning_rate": 6.611316228248222e-06, + "loss": 0.12005615234375, + "step": 10352 + }, + { + "epoch": 0.6998107340813844, + "grad_norm": 0.5268136262893677, + "learning_rate": 6.608581113994319e-06, + "loss": 0.102020263671875, + "step": 10353 + }, + { + "epoch": 0.6998783290523185, + "grad_norm": 0.42469245195388794, + "learning_rate": 6.605846405769455e-06, + "loss": 0.07293701171875, + "step": 10354 + }, + { + "epoch": 0.6999459240232526, + "grad_norm": 1.4402813911437988, + "learning_rate": 6.603112103705932e-06, + "loss": 0.11541748046875, + "step": 10355 + }, + { + "epoch": 0.7000135189941868, + "grad_norm": 0.24992553889751434, + "learning_rate": 6.60037820793607e-06, + "loss": 0.033298492431640625, + "step": 10356 + }, + { + "epoch": 0.700081113965121, + "grad_norm": 0.8690192699432373, + "learning_rate": 6.597644718592148e-06, + "loss": 0.14862442016601562, + "step": 10357 + }, + { + "epoch": 0.7001487089360552, + "grad_norm": 0.6211632490158081, + "learning_rate": 6.594911635806428e-06, + "loss": 0.1103363037109375, + "step": 10358 + }, + { + "epoch": 0.7002163039069893, + "grad_norm": 0.5331317782402039, + "learning_rate": 6.592178959711153e-06, + "loss": 0.09323883056640625, + "step": 10359 + }, + { + "epoch": 0.7002838988779235, + "grad_norm": 0.28000614047050476, + "learning_rate": 6.5894466904385485e-06, + "loss": 0.0483551025390625, + "step": 10360 + }, + { + "epoch": 0.7003514938488576, + "grad_norm": 0.25421053171157837, + "learning_rate": 6.5867148281208114e-06, + "loss": 0.047756195068359375, + "step": 10361 + }, + { + "epoch": 0.7004190888197918, + "grad_norm": 0.6904811263084412, + "learning_rate": 6.583983372890144e-06, + "loss": 0.13233184814453125, + "step": 10362 + }, + { + "epoch": 0.700486683790726, + "grad_norm": 0.48131340742111206, + "learning_rate": 6.581252324878689e-06, + "loss": 0.07085418701171875, + "step": 10363 + }, + { + "epoch": 0.7005542787616601, + "grad_norm": 0.9705228209495544, + "learning_rate": 6.5785216842186074e-06, + "loss": 0.09151458740234375, + "step": 10364 + }, + { + "epoch": 0.7006218737325943, + "grad_norm": 0.21799378097057343, + "learning_rate": 6.575791451042019e-06, + "loss": 0.047393798828125, + "step": 10365 + }, + { + "epoch": 0.7006894687035284, + "grad_norm": 0.7213233709335327, + "learning_rate": 6.573061625481031e-06, + "loss": 0.1375732421875, + "step": 10366 + }, + { + "epoch": 0.7007570636744627, + "grad_norm": 0.36334046721458435, + "learning_rate": 6.570332207667732e-06, + "loss": 0.08043670654296875, + "step": 10367 + }, + { + "epoch": 0.7008246586453968, + "grad_norm": 0.6018199324607849, + "learning_rate": 6.567603197734174e-06, + "loss": 0.0993499755859375, + "step": 10368 + }, + { + "epoch": 0.7008922536163309, + "grad_norm": 0.3323206305503845, + "learning_rate": 6.564874595812419e-06, + "loss": 0.061901092529296875, + "step": 10369 + }, + { + "epoch": 0.7009598485872651, + "grad_norm": 0.14882351458072662, + "learning_rate": 6.562146402034489e-06, + "loss": 0.017606735229492188, + "step": 10370 + }, + { + "epoch": 0.7010274435581992, + "grad_norm": 0.32160893082618713, + "learning_rate": 6.559418616532391e-06, + "loss": 0.0508270263671875, + "step": 10371 + }, + { + "epoch": 0.7010950385291335, + "grad_norm": 0.5913649797439575, + "learning_rate": 6.556691239438111e-06, + "loss": 0.1328887939453125, + "step": 10372 + }, + { + "epoch": 0.7011626335000676, + "grad_norm": 0.7279521226882935, + "learning_rate": 6.553964270883618e-06, + "loss": 0.1598358154296875, + "step": 10373 + }, + { + "epoch": 0.7012302284710018, + "grad_norm": 0.3976545035839081, + "learning_rate": 6.5512377110008514e-06, + "loss": 0.0638885498046875, + "step": 10374 + }, + { + "epoch": 0.7012978234419359, + "grad_norm": 1.0160250663757324, + "learning_rate": 6.54851155992176e-06, + "loss": 0.170074462890625, + "step": 10375 + }, + { + "epoch": 0.70136541841287, + "grad_norm": 0.2307116836309433, + "learning_rate": 6.545785817778227e-06, + "loss": 0.0402069091796875, + "step": 10376 + }, + { + "epoch": 0.7014330133838043, + "grad_norm": 0.3225402235984802, + "learning_rate": 6.543060484702158e-06, + "loss": 0.040157318115234375, + "step": 10377 + }, + { + "epoch": 0.7015006083547384, + "grad_norm": 0.2140415757894516, + "learning_rate": 6.540335560825419e-06, + "loss": 0.03844451904296875, + "step": 10378 + }, + { + "epoch": 0.7015682033256726, + "grad_norm": 0.3832681179046631, + "learning_rate": 6.537611046279847e-06, + "loss": 0.0748291015625, + "step": 10379 + }, + { + "epoch": 0.7016357982966067, + "grad_norm": 0.7489080429077148, + "learning_rate": 6.534886941197285e-06, + "loss": 0.1413421630859375, + "step": 10380 + }, + { + "epoch": 0.7017033932675409, + "grad_norm": 1.0587806701660156, + "learning_rate": 6.532163245709535e-06, + "loss": 0.0763397216796875, + "step": 10381 + }, + { + "epoch": 0.7017709882384751, + "grad_norm": 0.794024646282196, + "learning_rate": 6.52943995994839e-06, + "loss": 0.12920761108398438, + "step": 10382 + }, + { + "epoch": 0.7018385832094092, + "grad_norm": 1.2005772590637207, + "learning_rate": 6.526717084045617e-06, + "loss": 0.176361083984375, + "step": 10383 + }, + { + "epoch": 0.7019061781803434, + "grad_norm": 1.521423101425171, + "learning_rate": 6.5239946181329675e-06, + "loss": 0.1602630615234375, + "step": 10384 + }, + { + "epoch": 0.7019737731512775, + "grad_norm": 0.5498899221420288, + "learning_rate": 6.521272562342167e-06, + "loss": 0.108642578125, + "step": 10385 + }, + { + "epoch": 0.7020413681222117, + "grad_norm": 0.8316846489906311, + "learning_rate": 6.51855091680493e-06, + "loss": 0.1839599609375, + "step": 10386 + }, + { + "epoch": 0.7021089630931459, + "grad_norm": 0.8737227916717529, + "learning_rate": 6.515829681652941e-06, + "loss": 0.161956787109375, + "step": 10387 + }, + { + "epoch": 0.7021765580640801, + "grad_norm": 0.1836978942155838, + "learning_rate": 6.513108857017886e-06, + "loss": 0.03219795227050781, + "step": 10388 + }, + { + "epoch": 0.7022441530350142, + "grad_norm": 0.20539501309394836, + "learning_rate": 6.510388443031396e-06, + "loss": 0.026363372802734375, + "step": 10389 + }, + { + "epoch": 0.7023117480059483, + "grad_norm": 0.37345290184020996, + "learning_rate": 6.507668439825106e-06, + "loss": 0.028217315673828125, + "step": 10390 + }, + { + "epoch": 0.7023793429768825, + "grad_norm": 0.27838048338890076, + "learning_rate": 6.50494884753064e-06, + "loss": 0.0517425537109375, + "step": 10391 + }, + { + "epoch": 0.7024469379478167, + "grad_norm": 0.594819188117981, + "learning_rate": 6.5022296662795675e-06, + "loss": 0.11057281494140625, + "step": 10392 + }, + { + "epoch": 0.7025145329187509, + "grad_norm": 1.5179752111434937, + "learning_rate": 6.499510896203477e-06, + "loss": 0.12685394287109375, + "step": 10393 + }, + { + "epoch": 0.702582127889685, + "grad_norm": 0.6585514545440674, + "learning_rate": 6.496792537433913e-06, + "loss": 0.1028900146484375, + "step": 10394 + }, + { + "epoch": 0.7026497228606192, + "grad_norm": 0.20945122838020325, + "learning_rate": 6.494074590102408e-06, + "loss": 0.023738861083984375, + "step": 10395 + }, + { + "epoch": 0.7027173178315533, + "grad_norm": 0.5172110199928284, + "learning_rate": 6.4913570543404705e-06, + "loss": 0.07059478759765625, + "step": 10396 + }, + { + "epoch": 0.7027849128024874, + "grad_norm": 0.8461733460426331, + "learning_rate": 6.4886399302795935e-06, + "loss": 0.14697265625, + "step": 10397 + }, + { + "epoch": 0.7028525077734217, + "grad_norm": 0.7858896255493164, + "learning_rate": 6.48592321805125e-06, + "loss": 0.136444091796875, + "step": 10398 + }, + { + "epoch": 0.7029201027443558, + "grad_norm": 0.3121233284473419, + "learning_rate": 6.483206917786887e-06, + "loss": 0.05216217041015625, + "step": 10399 + }, + { + "epoch": 0.70298769771529, + "grad_norm": 0.43393972516059875, + "learning_rate": 6.4804910296179355e-06, + "loss": 0.058383941650390625, + "step": 10400 + }, + { + "epoch": 0.7030552926862241, + "grad_norm": 1.6694650650024414, + "learning_rate": 6.47777555367582e-06, + "loss": 0.1514892578125, + "step": 10401 + }, + { + "epoch": 0.7031228876571584, + "grad_norm": 0.7485076189041138, + "learning_rate": 6.475060490091917e-06, + "loss": 0.15032958984375, + "step": 10402 + }, + { + "epoch": 0.7031904826280925, + "grad_norm": 0.5843498706817627, + "learning_rate": 6.4723458389975985e-06, + "loss": 0.110260009765625, + "step": 10403 + }, + { + "epoch": 0.7032580775990266, + "grad_norm": 0.7582786679267883, + "learning_rate": 6.469631600524231e-06, + "loss": 0.143157958984375, + "step": 10404 + }, + { + "epoch": 0.7033256725699608, + "grad_norm": 1.8464282751083374, + "learning_rate": 6.466917774803128e-06, + "loss": 0.1394805908203125, + "step": 10405 + }, + { + "epoch": 0.7033932675408949, + "grad_norm": 0.7020469307899475, + "learning_rate": 6.464204361965614e-06, + "loss": 0.1539154052734375, + "step": 10406 + }, + { + "epoch": 0.7034608625118292, + "grad_norm": 1.4744863510131836, + "learning_rate": 6.461491362142976e-06, + "loss": 0.1212005615234375, + "step": 10407 + }, + { + "epoch": 0.7035284574827633, + "grad_norm": 0.6403448581695557, + "learning_rate": 6.458778775466486e-06, + "loss": 0.154266357421875, + "step": 10408 + }, + { + "epoch": 0.7035960524536975, + "grad_norm": 0.4889504015445709, + "learning_rate": 6.456066602067396e-06, + "loss": 0.09326171875, + "step": 10409 + }, + { + "epoch": 0.7036636474246316, + "grad_norm": 0.42025667428970337, + "learning_rate": 6.453354842076939e-06, + "loss": 0.0519256591796875, + "step": 10410 + }, + { + "epoch": 0.7037312423955657, + "grad_norm": 0.33169135451316833, + "learning_rate": 6.45064349562632e-06, + "loss": 0.04162883758544922, + "step": 10411 + }, + { + "epoch": 0.7037988373665, + "grad_norm": 0.6460099220275879, + "learning_rate": 6.447932562846748e-06, + "loss": 0.1263580322265625, + "step": 10412 + }, + { + "epoch": 0.7038664323374341, + "grad_norm": 0.7599356770515442, + "learning_rate": 6.445222043869378e-06, + "loss": 0.17657470703125, + "step": 10413 + }, + { + "epoch": 0.7039340273083683, + "grad_norm": 0.8761036992073059, + "learning_rate": 6.4425119388253655e-06, + "loss": 0.18780517578125, + "step": 10414 + }, + { + "epoch": 0.7040016222793024, + "grad_norm": 0.27222567796707153, + "learning_rate": 6.439802247845845e-06, + "loss": 0.047115325927734375, + "step": 10415 + }, + { + "epoch": 0.7040692172502366, + "grad_norm": 0.6200736165046692, + "learning_rate": 6.437092971061921e-06, + "loss": 0.09527587890625, + "step": 10416 + }, + { + "epoch": 0.7041368122211707, + "grad_norm": 1.0587899684906006, + "learning_rate": 6.434384108604702e-06, + "loss": 0.13617706298828125, + "step": 10417 + }, + { + "epoch": 0.7042044071921049, + "grad_norm": 0.5900500416755676, + "learning_rate": 6.43167566060524e-06, + "loss": 0.09146881103515625, + "step": 10418 + }, + { + "epoch": 0.7042720021630391, + "grad_norm": 0.2651064097881317, + "learning_rate": 6.428967627194598e-06, + "loss": 0.04491996765136719, + "step": 10419 + }, + { + "epoch": 0.7043395971339732, + "grad_norm": 0.5554257035255432, + "learning_rate": 6.426260008503806e-06, + "loss": 0.0854339599609375, + "step": 10420 + }, + { + "epoch": 0.7044071921049074, + "grad_norm": 0.49358540773391724, + "learning_rate": 6.423552804663875e-06, + "loss": 0.08692073822021484, + "step": 10421 + }, + { + "epoch": 0.7044747870758415, + "grad_norm": 0.6574657559394836, + "learning_rate": 6.420846015805795e-06, + "loss": 0.1206817626953125, + "step": 10422 + }, + { + "epoch": 0.7045423820467758, + "grad_norm": 0.21029609441757202, + "learning_rate": 6.4181396420605375e-06, + "loss": 0.037990570068359375, + "step": 10423 + }, + { + "epoch": 0.7046099770177099, + "grad_norm": 0.41482147574424744, + "learning_rate": 6.415433683559054e-06, + "loss": 0.070587158203125, + "step": 10424 + }, + { + "epoch": 0.704677571988644, + "grad_norm": 0.35221049189567566, + "learning_rate": 6.412728140432278e-06, + "loss": 0.053310394287109375, + "step": 10425 + }, + { + "epoch": 0.7047451669595782, + "grad_norm": 1.0278764963150024, + "learning_rate": 6.410023012811115e-06, + "loss": 0.16412353515625, + "step": 10426 + }, + { + "epoch": 0.7048127619305123, + "grad_norm": 1.1338553428649902, + "learning_rate": 6.407318300826457e-06, + "loss": 0.1472320556640625, + "step": 10427 + }, + { + "epoch": 0.7048803569014466, + "grad_norm": 1.0709741115570068, + "learning_rate": 6.404614004609185e-06, + "loss": 0.14156150817871094, + "step": 10428 + }, + { + "epoch": 0.7049479518723807, + "grad_norm": 0.27759402990341187, + "learning_rate": 6.401910124290132e-06, + "loss": 0.042724609375, + "step": 10429 + }, + { + "epoch": 0.7050155468433149, + "grad_norm": 0.9374758005142212, + "learning_rate": 6.39920666000015e-06, + "loss": 0.14083099365234375, + "step": 10430 + }, + { + "epoch": 0.705083141814249, + "grad_norm": 0.2900432050228119, + "learning_rate": 6.396503611870025e-06, + "loss": 0.056484222412109375, + "step": 10431 + }, + { + "epoch": 0.7051507367851831, + "grad_norm": 0.993465781211853, + "learning_rate": 6.3938009800305654e-06, + "loss": 0.1698760986328125, + "step": 10432 + }, + { + "epoch": 0.7052183317561174, + "grad_norm": 0.2683323621749878, + "learning_rate": 6.391098764612536e-06, + "loss": 0.050693511962890625, + "step": 10433 + }, + { + "epoch": 0.7052859267270515, + "grad_norm": 0.28711599111557007, + "learning_rate": 6.388396965746687e-06, + "loss": 0.053218841552734375, + "step": 10434 + }, + { + "epoch": 0.7053535216979857, + "grad_norm": 0.8330371975898743, + "learning_rate": 6.385695583563747e-06, + "loss": 0.11675262451171875, + "step": 10435 + }, + { + "epoch": 0.7054211166689198, + "grad_norm": 0.7682915925979614, + "learning_rate": 6.382994618194427e-06, + "loss": 0.1446990966796875, + "step": 10436 + }, + { + "epoch": 0.705488711639854, + "grad_norm": 0.7401764988899231, + "learning_rate": 6.380294069769416e-06, + "loss": 0.1270751953125, + "step": 10437 + }, + { + "epoch": 0.7055563066107882, + "grad_norm": 1.023696780204773, + "learning_rate": 6.3775939384193825e-06, + "loss": 0.11231613159179688, + "step": 10438 + }, + { + "epoch": 0.7056239015817223, + "grad_norm": 0.7877034544944763, + "learning_rate": 6.374894224274977e-06, + "loss": 0.18963623046875, + "step": 10439 + }, + { + "epoch": 0.7056914965526565, + "grad_norm": 2.2984538078308105, + "learning_rate": 6.372194927466822e-06, + "loss": 0.28564453125, + "step": 10440 + }, + { + "epoch": 0.7057590915235906, + "grad_norm": 0.30450257658958435, + "learning_rate": 6.369496048125544e-06, + "loss": 0.05743408203125, + "step": 10441 + }, + { + "epoch": 0.7058266864945248, + "grad_norm": 0.24319881200790405, + "learning_rate": 6.366797586381709e-06, + "loss": 0.04108428955078125, + "step": 10442 + }, + { + "epoch": 0.705894281465459, + "grad_norm": 0.809072732925415, + "learning_rate": 6.3640995423659055e-06, + "loss": 0.1152191162109375, + "step": 10443 + }, + { + "epoch": 0.7059618764363932, + "grad_norm": 0.701714813709259, + "learning_rate": 6.3614019162086636e-06, + "loss": 0.170623779296875, + "step": 10444 + }, + { + "epoch": 0.7060294714073273, + "grad_norm": 0.5316495299339294, + "learning_rate": 6.358704708040523e-06, + "loss": 0.10048675537109375, + "step": 10445 + }, + { + "epoch": 0.7060970663782614, + "grad_norm": 0.4266495406627655, + "learning_rate": 6.356007917991989e-06, + "loss": 0.094818115234375, + "step": 10446 + }, + { + "epoch": 0.7061646613491956, + "grad_norm": 0.9305607676506042, + "learning_rate": 6.353311546193548e-06, + "loss": 0.19708251953125, + "step": 10447 + }, + { + "epoch": 0.7062322563201298, + "grad_norm": 0.2346009612083435, + "learning_rate": 6.350615592775668e-06, + "loss": 0.04592132568359375, + "step": 10448 + }, + { + "epoch": 0.706299851291064, + "grad_norm": 0.30711618065834045, + "learning_rate": 6.347920057868796e-06, + "loss": 0.04223060607910156, + "step": 10449 + }, + { + "epoch": 0.7063674462619981, + "grad_norm": 0.6696476340293884, + "learning_rate": 6.345224941603358e-06, + "loss": 0.14000701904296875, + "step": 10450 + }, + { + "epoch": 0.7064350412329322, + "grad_norm": 0.9255599975585938, + "learning_rate": 6.342530244109761e-06, + "loss": 0.174041748046875, + "step": 10451 + }, + { + "epoch": 0.7065026362038664, + "grad_norm": 0.5677684545516968, + "learning_rate": 6.33983596551839e-06, + "loss": 0.13961029052734375, + "step": 10452 + }, + { + "epoch": 0.7065702311748006, + "grad_norm": 0.5445894002914429, + "learning_rate": 6.337142105959608e-06, + "loss": 0.0761260986328125, + "step": 10453 + }, + { + "epoch": 0.7066378261457348, + "grad_norm": 0.8206669092178345, + "learning_rate": 6.3344486655637755e-06, + "loss": 0.12462711334228516, + "step": 10454 + }, + { + "epoch": 0.7067054211166689, + "grad_norm": 0.7429386973381042, + "learning_rate": 6.331755644461194e-06, + "loss": 0.14111328125, + "step": 10455 + }, + { + "epoch": 0.7067730160876031, + "grad_norm": 0.8646976947784424, + "learning_rate": 6.329063042782189e-06, + "loss": 0.154388427734375, + "step": 10456 + }, + { + "epoch": 0.7068406110585372, + "grad_norm": 0.9768466353416443, + "learning_rate": 6.3263708606570365e-06, + "loss": 0.173919677734375, + "step": 10457 + }, + { + "epoch": 0.7069082060294714, + "grad_norm": 0.8439162373542786, + "learning_rate": 6.323679098216004e-06, + "loss": 0.11412811279296875, + "step": 10458 + }, + { + "epoch": 0.7069758010004056, + "grad_norm": 1.3300057649612427, + "learning_rate": 6.32098775558934e-06, + "loss": 0.0896148681640625, + "step": 10459 + }, + { + "epoch": 0.7070433959713397, + "grad_norm": 0.2575002610683441, + "learning_rate": 6.318296832907251e-06, + "loss": 0.042881011962890625, + "step": 10460 + }, + { + "epoch": 0.7071109909422739, + "grad_norm": 0.6773748993873596, + "learning_rate": 6.315606330299957e-06, + "loss": 0.1638031005859375, + "step": 10461 + }, + { + "epoch": 0.707178585913208, + "grad_norm": 0.4532229006290436, + "learning_rate": 6.3129162478976376e-06, + "loss": 0.07440185546875, + "step": 10462 + }, + { + "epoch": 0.7072461808841423, + "grad_norm": 0.8575584888458252, + "learning_rate": 6.3102265858304535e-06, + "loss": 0.12307357788085938, + "step": 10463 + }, + { + "epoch": 0.7073137758550764, + "grad_norm": 0.6400145888328552, + "learning_rate": 6.307537344228547e-06, + "loss": 0.1006622314453125, + "step": 10464 + }, + { + "epoch": 0.7073813708260105, + "grad_norm": 0.5325703024864197, + "learning_rate": 6.304848523222045e-06, + "loss": 0.1136474609375, + "step": 10465 + }, + { + "epoch": 0.7074489657969447, + "grad_norm": 0.6314755082130432, + "learning_rate": 6.302160122941039e-06, + "loss": 0.11244964599609375, + "step": 10466 + }, + { + "epoch": 0.7075165607678788, + "grad_norm": 0.66789710521698, + "learning_rate": 6.299472143515627e-06, + "loss": 0.145263671875, + "step": 10467 + }, + { + "epoch": 0.7075841557388131, + "grad_norm": 0.3848116099834442, + "learning_rate": 6.296784585075851e-06, + "loss": 0.051837921142578125, + "step": 10468 + }, + { + "epoch": 0.7076517507097472, + "grad_norm": 0.47322511672973633, + "learning_rate": 6.294097447751767e-06, + "loss": 0.06404876708984375, + "step": 10469 + }, + { + "epoch": 0.7077193456806814, + "grad_norm": 0.5672521591186523, + "learning_rate": 6.291410731673389e-06, + "loss": 0.14289093017578125, + "step": 10470 + }, + { + "epoch": 0.7077869406516155, + "grad_norm": 0.24665556848049164, + "learning_rate": 6.288724436970721e-06, + "loss": 0.0440521240234375, + "step": 10471 + }, + { + "epoch": 0.7078545356225496, + "grad_norm": 0.3359447717666626, + "learning_rate": 6.2860385637737365e-06, + "loss": 0.0565948486328125, + "step": 10472 + }, + { + "epoch": 0.7079221305934839, + "grad_norm": 0.29670730233192444, + "learning_rate": 6.283353112212398e-06, + "loss": 0.0537109375, + "step": 10473 + }, + { + "epoch": 0.707989725564418, + "grad_norm": 0.40077129006385803, + "learning_rate": 6.280668082416644e-06, + "loss": 0.05079078674316406, + "step": 10474 + }, + { + "epoch": 0.7080573205353522, + "grad_norm": 0.7978482842445374, + "learning_rate": 6.277983474516395e-06, + "loss": 0.1425628662109375, + "step": 10475 + }, + { + "epoch": 0.7081249155062863, + "grad_norm": 1.3967782258987427, + "learning_rate": 6.275299288641547e-06, + "loss": 0.13318634033203125, + "step": 10476 + }, + { + "epoch": 0.7081925104772205, + "grad_norm": 0.5668976902961731, + "learning_rate": 6.272615524921976e-06, + "loss": 0.10527801513671875, + "step": 10477 + }, + { + "epoch": 0.7082601054481547, + "grad_norm": 0.2578783631324768, + "learning_rate": 6.2699321834875414e-06, + "loss": 0.018129348754882812, + "step": 10478 + }, + { + "epoch": 0.7083277004190888, + "grad_norm": 0.5820475816726685, + "learning_rate": 6.267249264468075e-06, + "loss": 0.10984039306640625, + "step": 10479 + }, + { + "epoch": 0.708395295390023, + "grad_norm": 0.6888408660888672, + "learning_rate": 6.264566767993407e-06, + "loss": 0.1078338623046875, + "step": 10480 + }, + { + "epoch": 0.7084628903609571, + "grad_norm": 0.5992891192436218, + "learning_rate": 6.261884694193316e-06, + "loss": 0.1400604248046875, + "step": 10481 + }, + { + "epoch": 0.7085304853318913, + "grad_norm": 0.49546724557876587, + "learning_rate": 6.259203043197587e-06, + "loss": 0.09757614135742188, + "step": 10482 + }, + { + "epoch": 0.7085980803028255, + "grad_norm": 0.42952051758766174, + "learning_rate": 6.2565218151359795e-06, + "loss": 0.092041015625, + "step": 10483 + }, + { + "epoch": 0.7086656752737597, + "grad_norm": 0.7172062993049622, + "learning_rate": 6.253841010138213e-06, + "loss": 0.15045166015625, + "step": 10484 + }, + { + "epoch": 0.7087332702446938, + "grad_norm": 0.5178318619728088, + "learning_rate": 6.251160628334013e-06, + "loss": 0.08163070678710938, + "step": 10485 + }, + { + "epoch": 0.7088008652156279, + "grad_norm": 0.8532938361167908, + "learning_rate": 6.248480669853072e-06, + "loss": 0.12347412109375, + "step": 10486 + }, + { + "epoch": 0.7088684601865621, + "grad_norm": 0.8261191844940186, + "learning_rate": 6.245801134825061e-06, + "loss": 0.12548828125, + "step": 10487 + }, + { + "epoch": 0.7089360551574962, + "grad_norm": 1.0398311614990234, + "learning_rate": 6.243122023379633e-06, + "loss": 0.154510498046875, + "step": 10488 + }, + { + "epoch": 0.7090036501284305, + "grad_norm": 0.9171568751335144, + "learning_rate": 6.240443335646418e-06, + "loss": 0.114837646484375, + "step": 10489 + }, + { + "epoch": 0.7090712450993646, + "grad_norm": 0.2656332552433014, + "learning_rate": 6.2377650717550315e-06, + "loss": 0.0454254150390625, + "step": 10490 + }, + { + "epoch": 0.7091388400702988, + "grad_norm": 0.4815550446510315, + "learning_rate": 6.235087231835063e-06, + "loss": 0.07442474365234375, + "step": 10491 + }, + { + "epoch": 0.7092064350412329, + "grad_norm": 0.2738338112831116, + "learning_rate": 6.232409816016076e-06, + "loss": 0.037624359130859375, + "step": 10492 + }, + { + "epoch": 0.709274030012167, + "grad_norm": 0.2844396233558655, + "learning_rate": 6.229732824427638e-06, + "loss": 0.0406341552734375, + "step": 10493 + }, + { + "epoch": 0.7093416249831013, + "grad_norm": 1.1394309997558594, + "learning_rate": 6.227056257199258e-06, + "loss": 0.169342041015625, + "step": 10494 + }, + { + "epoch": 0.7094092199540354, + "grad_norm": 0.5474371314048767, + "learning_rate": 6.224380114460459e-06, + "loss": 0.0941009521484375, + "step": 10495 + }, + { + "epoch": 0.7094768149249696, + "grad_norm": 0.9479063153266907, + "learning_rate": 6.22170439634073e-06, + "loss": 0.1156005859375, + "step": 10496 + }, + { + "epoch": 0.7095444098959037, + "grad_norm": 0.6758090257644653, + "learning_rate": 6.219029102969524e-06, + "loss": 0.1214599609375, + "step": 10497 + }, + { + "epoch": 0.709612004866838, + "grad_norm": 0.7220584154129028, + "learning_rate": 6.216354234476304e-06, + "loss": 0.130279541015625, + "step": 10498 + }, + { + "epoch": 0.7096795998377721, + "grad_norm": 0.26143360137939453, + "learning_rate": 6.213679790990489e-06, + "loss": 0.0346221923828125, + "step": 10499 + }, + { + "epoch": 0.7097471948087062, + "grad_norm": 0.49329420924186707, + "learning_rate": 6.211005772641491e-06, + "loss": 0.09466552734375, + "step": 10500 + }, + { + "epoch": 0.7098147897796404, + "grad_norm": 0.3114059567451477, + "learning_rate": 6.208332179558689e-06, + "loss": 0.04619598388671875, + "step": 10501 + }, + { + "epoch": 0.7098823847505745, + "grad_norm": 0.8611077070236206, + "learning_rate": 6.2056590118714514e-06, + "loss": 0.12557220458984375, + "step": 10502 + }, + { + "epoch": 0.7099499797215088, + "grad_norm": 0.49164944887161255, + "learning_rate": 6.202986269709117e-06, + "loss": 0.07747650146484375, + "step": 10503 + }, + { + "epoch": 0.7100175746924429, + "grad_norm": 0.7560825347900391, + "learning_rate": 6.200313953201025e-06, + "loss": 0.0874032974243164, + "step": 10504 + }, + { + "epoch": 0.7100851696633771, + "grad_norm": 0.8113174438476562, + "learning_rate": 6.1976420624764595e-06, + "loss": 0.150909423828125, + "step": 10505 + }, + { + "epoch": 0.7101527646343112, + "grad_norm": 0.49856942892074585, + "learning_rate": 6.194970597664721e-06, + "loss": 0.10465240478515625, + "step": 10506 + }, + { + "epoch": 0.7102203596052453, + "grad_norm": 0.4046926498413086, + "learning_rate": 6.192299558895058e-06, + "loss": 0.105743408203125, + "step": 10507 + }, + { + "epoch": 0.7102879545761795, + "grad_norm": 0.5663446187973022, + "learning_rate": 6.189628946296713e-06, + "loss": 0.121856689453125, + "step": 10508 + }, + { + "epoch": 0.7103555495471137, + "grad_norm": 0.30788174271583557, + "learning_rate": 6.186958759998921e-06, + "loss": 0.05854034423828125, + "step": 10509 + }, + { + "epoch": 0.7104231445180479, + "grad_norm": 0.45188483595848083, + "learning_rate": 6.18428900013086e-06, + "loss": 0.0751800537109375, + "step": 10510 + }, + { + "epoch": 0.710490739488982, + "grad_norm": 0.717714250087738, + "learning_rate": 6.181619666821727e-06, + "loss": 0.12186050415039062, + "step": 10511 + }, + { + "epoch": 0.7105583344599162, + "grad_norm": 0.443404883146286, + "learning_rate": 6.178950760200676e-06, + "loss": 0.070068359375, + "step": 10512 + }, + { + "epoch": 0.7106259294308503, + "grad_norm": 0.44276222586631775, + "learning_rate": 6.176282280396845e-06, + "loss": 0.06002044677734375, + "step": 10513 + }, + { + "epoch": 0.7106935244017845, + "grad_norm": 0.9528012275695801, + "learning_rate": 6.173614227539352e-06, + "loss": 0.17303466796875, + "step": 10514 + }, + { + "epoch": 0.7107611193727187, + "grad_norm": 1.144537329673767, + "learning_rate": 6.170946601757293e-06, + "loss": 0.20819091796875, + "step": 10515 + }, + { + "epoch": 0.7108287143436528, + "grad_norm": 0.9650439620018005, + "learning_rate": 6.168279403179738e-06, + "loss": 0.146240234375, + "step": 10516 + }, + { + "epoch": 0.710896309314587, + "grad_norm": 1.099033236503601, + "learning_rate": 6.16561263193576e-06, + "loss": 0.204254150390625, + "step": 10517 + }, + { + "epoch": 0.7109639042855211, + "grad_norm": 1.2765525579452515, + "learning_rate": 6.1629462881543795e-06, + "loss": 0.162689208984375, + "step": 10518 + }, + { + "epoch": 0.7110314992564554, + "grad_norm": 0.6198372840881348, + "learning_rate": 6.16028037196461e-06, + "loss": 0.1217498779296875, + "step": 10519 + }, + { + "epoch": 0.7110990942273895, + "grad_norm": 0.2878434360027313, + "learning_rate": 6.157614883495458e-06, + "loss": 0.04613494873046875, + "step": 10520 + }, + { + "epoch": 0.7111666891983236, + "grad_norm": 0.7280130386352539, + "learning_rate": 6.154949822875878e-06, + "loss": 0.13149261474609375, + "step": 10521 + }, + { + "epoch": 0.7112342841692578, + "grad_norm": 0.26599863171577454, + "learning_rate": 6.1522851902348435e-06, + "loss": 0.04239654541015625, + "step": 10522 + }, + { + "epoch": 0.7113018791401919, + "grad_norm": 1.3676598072052002, + "learning_rate": 6.149620985701265e-06, + "loss": 0.18389892578125, + "step": 10523 + }, + { + "epoch": 0.7113694741111262, + "grad_norm": 1.0095117092132568, + "learning_rate": 6.146957209404068e-06, + "loss": 0.154815673828125, + "step": 10524 + }, + { + "epoch": 0.7114370690820603, + "grad_norm": 0.4549994468688965, + "learning_rate": 6.144293861472137e-06, + "loss": 0.073944091796875, + "step": 10525 + }, + { + "epoch": 0.7115046640529945, + "grad_norm": 0.8721324801445007, + "learning_rate": 6.141630942034342e-06, + "loss": 0.1198883056640625, + "step": 10526 + }, + { + "epoch": 0.7115722590239286, + "grad_norm": 0.6264830827713013, + "learning_rate": 6.138968451219532e-06, + "loss": 0.11951446533203125, + "step": 10527 + }, + { + "epoch": 0.7116398539948627, + "grad_norm": 0.5395201444625854, + "learning_rate": 6.1363063891565344e-06, + "loss": 0.07250213623046875, + "step": 10528 + }, + { + "epoch": 0.711707448965797, + "grad_norm": 0.22069714963436127, + "learning_rate": 6.133644755974151e-06, + "loss": 0.024138927459716797, + "step": 10529 + }, + { + "epoch": 0.7117750439367311, + "grad_norm": 0.8233470320701599, + "learning_rate": 6.130983551801185e-06, + "loss": 0.17901611328125, + "step": 10530 + }, + { + "epoch": 0.7118426389076653, + "grad_norm": 0.6964094042778015, + "learning_rate": 6.128322776766384e-06, + "loss": 0.111419677734375, + "step": 10531 + }, + { + "epoch": 0.7119102338785994, + "grad_norm": 0.23864130675792694, + "learning_rate": 6.125662430998495e-06, + "loss": 0.044830322265625, + "step": 10532 + }, + { + "epoch": 0.7119778288495336, + "grad_norm": 0.6573531627655029, + "learning_rate": 6.123002514626257e-06, + "loss": 0.10837936401367188, + "step": 10533 + }, + { + "epoch": 0.7120454238204678, + "grad_norm": 0.99046790599823, + "learning_rate": 6.120343027778353e-06, + "loss": 0.1626739501953125, + "step": 10534 + }, + { + "epoch": 0.7121130187914019, + "grad_norm": 0.6505600214004517, + "learning_rate": 6.1176839705834845e-06, + "loss": 0.092559814453125, + "step": 10535 + }, + { + "epoch": 0.7121806137623361, + "grad_norm": 0.7780642509460449, + "learning_rate": 6.1150253431702935e-06, + "loss": 0.162750244140625, + "step": 10536 + }, + { + "epoch": 0.7122482087332702, + "grad_norm": 0.6693102717399597, + "learning_rate": 6.112367145667438e-06, + "loss": 0.15167236328125, + "step": 10537 + }, + { + "epoch": 0.7123158037042044, + "grad_norm": 0.2233293205499649, + "learning_rate": 6.10970937820353e-06, + "loss": 0.0239410400390625, + "step": 10538 + }, + { + "epoch": 0.7123833986751386, + "grad_norm": 0.7029841542243958, + "learning_rate": 6.107052040907172e-06, + "loss": 0.120025634765625, + "step": 10539 + }, + { + "epoch": 0.7124509936460728, + "grad_norm": 0.4454919099807739, + "learning_rate": 6.10439513390694e-06, + "loss": 0.0723419189453125, + "step": 10540 + }, + { + "epoch": 0.7125185886170069, + "grad_norm": 0.8572376370429993, + "learning_rate": 6.101738657331392e-06, + "loss": 0.1275634765625, + "step": 10541 + }, + { + "epoch": 0.712586183587941, + "grad_norm": 1.4411289691925049, + "learning_rate": 6.0990826113090665e-06, + "loss": 0.1930084228515625, + "step": 10542 + }, + { + "epoch": 0.7126537785588752, + "grad_norm": 0.5979475975036621, + "learning_rate": 6.096426995968478e-06, + "loss": 0.1014862060546875, + "step": 10543 + }, + { + "epoch": 0.7127213735298094, + "grad_norm": 1.1606477499008179, + "learning_rate": 6.093771811438122e-06, + "loss": 0.198944091796875, + "step": 10544 + }, + { + "epoch": 0.7127889685007436, + "grad_norm": 0.44359248876571655, + "learning_rate": 6.091117057846468e-06, + "loss": 0.07787322998046875, + "step": 10545 + }, + { + "epoch": 0.7128565634716777, + "grad_norm": 0.3350120186805725, + "learning_rate": 6.0884627353219854e-06, + "loss": 0.050556182861328125, + "step": 10546 + }, + { + "epoch": 0.7129241584426119, + "grad_norm": 0.5502070188522339, + "learning_rate": 6.085808843993086e-06, + "loss": 0.0886077880859375, + "step": 10547 + }, + { + "epoch": 0.712991753413546, + "grad_norm": 1.206626057624817, + "learning_rate": 6.0831553839881945e-06, + "loss": 0.15911865234375, + "step": 10548 + }, + { + "epoch": 0.7130593483844802, + "grad_norm": 0.5949788689613342, + "learning_rate": 6.080502355435702e-06, + "loss": 0.0910186767578125, + "step": 10549 + }, + { + "epoch": 0.7131269433554144, + "grad_norm": 0.3696732223033905, + "learning_rate": 6.0778497584639715e-06, + "loss": 0.05768775939941406, + "step": 10550 + }, + { + "epoch": 0.7131945383263485, + "grad_norm": 0.5566837787628174, + "learning_rate": 6.075197593201358e-06, + "loss": 0.10250091552734375, + "step": 10551 + }, + { + "epoch": 0.7132621332972827, + "grad_norm": 0.6273710131645203, + "learning_rate": 6.0725458597761874e-06, + "loss": 0.10353469848632812, + "step": 10552 + }, + { + "epoch": 0.7133297282682168, + "grad_norm": 1.5282421112060547, + "learning_rate": 6.069894558316768e-06, + "loss": 0.1809234619140625, + "step": 10553 + }, + { + "epoch": 0.7133973232391511, + "grad_norm": 0.3077695965766907, + "learning_rate": 6.067243688951384e-06, + "loss": 0.06290435791015625, + "step": 10554 + }, + { + "epoch": 0.7134649182100852, + "grad_norm": 0.3133178651332855, + "learning_rate": 6.0645932518083054e-06, + "loss": 0.036968231201171875, + "step": 10555 + }, + { + "epoch": 0.7135325131810193, + "grad_norm": 0.4221886098384857, + "learning_rate": 6.061943247015771e-06, + "loss": 0.0843353271484375, + "step": 10556 + }, + { + "epoch": 0.7136001081519535, + "grad_norm": 0.8732019662857056, + "learning_rate": 6.0592936747020085e-06, + "loss": 0.203765869140625, + "step": 10557 + }, + { + "epoch": 0.7136677031228876, + "grad_norm": 0.3185567855834961, + "learning_rate": 6.056644534995215e-06, + "loss": 0.03692626953125, + "step": 10558 + }, + { + "epoch": 0.7137352980938219, + "grad_norm": 0.3927328586578369, + "learning_rate": 6.053995828023587e-06, + "loss": 0.03827667236328125, + "step": 10559 + }, + { + "epoch": 0.713802893064756, + "grad_norm": 0.5883041024208069, + "learning_rate": 6.051347553915266e-06, + "loss": 0.124298095703125, + "step": 10560 + }, + { + "epoch": 0.7138704880356902, + "grad_norm": 0.24067839980125427, + "learning_rate": 6.048699712798406e-06, + "loss": 0.04497528076171875, + "step": 10561 + }, + { + "epoch": 0.7139380830066243, + "grad_norm": 0.4711301028728485, + "learning_rate": 6.046052304801121e-06, + "loss": 0.052761077880859375, + "step": 10562 + }, + { + "epoch": 0.7140056779775584, + "grad_norm": 0.508794903755188, + "learning_rate": 6.043405330051508e-06, + "loss": 0.07663726806640625, + "step": 10563 + }, + { + "epoch": 0.7140732729484927, + "grad_norm": 0.7547810077667236, + "learning_rate": 6.040758788677648e-06, + "loss": 0.08316802978515625, + "step": 10564 + }, + { + "epoch": 0.7141408679194268, + "grad_norm": 0.4510578513145447, + "learning_rate": 6.038112680807594e-06, + "loss": 0.0853424072265625, + "step": 10565 + }, + { + "epoch": 0.714208462890361, + "grad_norm": 1.630419373512268, + "learning_rate": 6.035467006569382e-06, + "loss": 0.1885986328125, + "step": 10566 + }, + { + "epoch": 0.7142760578612951, + "grad_norm": 1.0580222606658936, + "learning_rate": 6.032821766091027e-06, + "loss": 0.16522216796875, + "step": 10567 + }, + { + "epoch": 0.7143436528322293, + "grad_norm": 0.9351329207420349, + "learning_rate": 6.0301769595005205e-06, + "loss": 0.188232421875, + "step": 10568 + }, + { + "epoch": 0.7144112478031635, + "grad_norm": 0.26331761479377747, + "learning_rate": 6.027532586925834e-06, + "loss": 0.05539703369140625, + "step": 10569 + }, + { + "epoch": 0.7144788427740976, + "grad_norm": 0.3668034076690674, + "learning_rate": 6.024888648494921e-06, + "loss": 0.055347442626953125, + "step": 10570 + }, + { + "epoch": 0.7145464377450318, + "grad_norm": 0.7484716773033142, + "learning_rate": 6.022245144335705e-06, + "loss": 0.10619735717773438, + "step": 10571 + }, + { + "epoch": 0.7146140327159659, + "grad_norm": 0.5826205611228943, + "learning_rate": 6.019602074576113e-06, + "loss": 0.0899505615234375, + "step": 10572 + }, + { + "epoch": 0.7146816276869001, + "grad_norm": 0.46670177578926086, + "learning_rate": 6.01695943934401e-06, + "loss": 0.07464981079101562, + "step": 10573 + }, + { + "epoch": 0.7147492226578342, + "grad_norm": 0.45304858684539795, + "learning_rate": 6.01431723876728e-06, + "loss": 0.06864166259765625, + "step": 10574 + }, + { + "epoch": 0.7148168176287685, + "grad_norm": 1.0510817766189575, + "learning_rate": 6.011675472973762e-06, + "loss": 0.169708251953125, + "step": 10575 + }, + { + "epoch": 0.7148844125997026, + "grad_norm": 0.7400321960449219, + "learning_rate": 6.009034142091284e-06, + "loss": 0.1391754150390625, + "step": 10576 + }, + { + "epoch": 0.7149520075706367, + "grad_norm": 0.6642686128616333, + "learning_rate": 6.006393246247647e-06, + "loss": 0.1229705810546875, + "step": 10577 + }, + { + "epoch": 0.7150196025415709, + "grad_norm": 0.5738087892532349, + "learning_rate": 6.003752785570636e-06, + "loss": 0.0955657958984375, + "step": 10578 + }, + { + "epoch": 0.715087197512505, + "grad_norm": 0.27612248063087463, + "learning_rate": 6.001112760188012e-06, + "loss": 0.042621612548828125, + "step": 10579 + }, + { + "epoch": 0.7151547924834393, + "grad_norm": 0.8811525106430054, + "learning_rate": 5.9984731702275166e-06, + "loss": 0.11688613891601562, + "step": 10580 + }, + { + "epoch": 0.7152223874543734, + "grad_norm": 0.6152281761169434, + "learning_rate": 5.99583401581687e-06, + "loss": 0.10321044921875, + "step": 10581 + }, + { + "epoch": 0.7152899824253075, + "grad_norm": 0.7231747508049011, + "learning_rate": 5.993195297083768e-06, + "loss": 0.13376617431640625, + "step": 10582 + }, + { + "epoch": 0.7153575773962417, + "grad_norm": 0.5862913727760315, + "learning_rate": 5.990557014155891e-06, + "loss": 0.12091064453125, + "step": 10583 + }, + { + "epoch": 0.7154251723671758, + "grad_norm": 0.9680089950561523, + "learning_rate": 5.987919167160889e-06, + "loss": 0.1270599365234375, + "step": 10584 + }, + { + "epoch": 0.7154927673381101, + "grad_norm": 0.36800017952919006, + "learning_rate": 5.985281756226416e-06, + "loss": 0.0951080322265625, + "step": 10585 + }, + { + "epoch": 0.7155603623090442, + "grad_norm": 0.768827497959137, + "learning_rate": 5.982644781480061e-06, + "loss": 0.0941009521484375, + "step": 10586 + }, + { + "epoch": 0.7156279572799784, + "grad_norm": 0.34150466322898865, + "learning_rate": 5.980008243049436e-06, + "loss": 0.046916961669921875, + "step": 10587 + }, + { + "epoch": 0.7156955522509125, + "grad_norm": 0.5022083520889282, + "learning_rate": 5.977372141062111e-06, + "loss": 0.088348388671875, + "step": 10588 + }, + { + "epoch": 0.7157631472218466, + "grad_norm": 0.644321620464325, + "learning_rate": 5.974736475645624e-06, + "loss": 0.09843063354492188, + "step": 10589 + }, + { + "epoch": 0.7158307421927809, + "grad_norm": 0.41549360752105713, + "learning_rate": 5.972101246927517e-06, + "loss": 0.06567764282226562, + "step": 10590 + }, + { + "epoch": 0.715898337163715, + "grad_norm": 0.333748996257782, + "learning_rate": 5.969466455035298e-06, + "loss": 0.06959152221679688, + "step": 10591 + }, + { + "epoch": 0.7159659321346492, + "grad_norm": 0.381157249212265, + "learning_rate": 5.96683210009645e-06, + "loss": 0.07903289794921875, + "step": 10592 + }, + { + "epoch": 0.7160335271055833, + "grad_norm": 0.5359281301498413, + "learning_rate": 5.964198182238443e-06, + "loss": 0.1091766357421875, + "step": 10593 + }, + { + "epoch": 0.7161011220765175, + "grad_norm": 0.801956057548523, + "learning_rate": 5.96156470158872e-06, + "loss": 0.1236419677734375, + "step": 10594 + }, + { + "epoch": 0.7161687170474517, + "grad_norm": 1.0951365232467651, + "learning_rate": 5.958931658274701e-06, + "loss": 0.14324951171875, + "step": 10595 + }, + { + "epoch": 0.7162363120183858, + "grad_norm": 0.3232915699481964, + "learning_rate": 5.956299052423807e-06, + "loss": 0.03986358642578125, + "step": 10596 + }, + { + "epoch": 0.71630390698932, + "grad_norm": 0.7263240814208984, + "learning_rate": 5.953666884163394e-06, + "loss": 0.13680267333984375, + "step": 10597 + }, + { + "epoch": 0.7163715019602541, + "grad_norm": 1.0649235248565674, + "learning_rate": 5.9510351536208466e-06, + "loss": 0.15826416015625, + "step": 10598 + }, + { + "epoch": 0.7164390969311883, + "grad_norm": 0.6744592785835266, + "learning_rate": 5.948403860923483e-06, + "loss": 0.126800537109375, + "step": 10599 + }, + { + "epoch": 0.7165066919021225, + "grad_norm": 0.3778160810470581, + "learning_rate": 5.945773006198637e-06, + "loss": 0.05228996276855469, + "step": 10600 + }, + { + "epoch": 0.7165742868730567, + "grad_norm": 0.33325469493865967, + "learning_rate": 5.943142589573607e-06, + "loss": 0.06354522705078125, + "step": 10601 + }, + { + "epoch": 0.7166418818439908, + "grad_norm": 0.7534608840942383, + "learning_rate": 5.940512611175651e-06, + "loss": 0.140350341796875, + "step": 10602 + }, + { + "epoch": 0.7167094768149249, + "grad_norm": 0.36787310242652893, + "learning_rate": 5.937883071132041e-06, + "loss": 0.0811309814453125, + "step": 10603 + }, + { + "epoch": 0.7167770717858591, + "grad_norm": 1.1151565313339233, + "learning_rate": 5.935253969570004e-06, + "loss": 0.152313232421875, + "step": 10604 + }, + { + "epoch": 0.7168446667567933, + "grad_norm": 0.33080652356147766, + "learning_rate": 5.932625306616754e-06, + "loss": 0.0540008544921875, + "step": 10605 + }, + { + "epoch": 0.7169122617277275, + "grad_norm": 0.5312182903289795, + "learning_rate": 5.92999708239948e-06, + "loss": 0.067108154296875, + "step": 10606 + }, + { + "epoch": 0.7169798566986616, + "grad_norm": 0.3693558871746063, + "learning_rate": 5.9273692970453545e-06, + "loss": 0.0571441650390625, + "step": 10607 + }, + { + "epoch": 0.7170474516695958, + "grad_norm": 0.8651136159896851, + "learning_rate": 5.924741950681517e-06, + "loss": 0.148468017578125, + "step": 10608 + }, + { + "epoch": 0.7171150466405299, + "grad_norm": 0.4313332736492157, + "learning_rate": 5.922115043435114e-06, + "loss": 0.07242584228515625, + "step": 10609 + }, + { + "epoch": 0.7171826416114641, + "grad_norm": 0.6070297360420227, + "learning_rate": 5.91948857543323e-06, + "loss": 0.0977783203125, + "step": 10610 + }, + { + "epoch": 0.7172502365823983, + "grad_norm": 0.6520078182220459, + "learning_rate": 5.916862546802964e-06, + "loss": 0.07597732543945312, + "step": 10611 + }, + { + "epoch": 0.7173178315533324, + "grad_norm": 0.7479240894317627, + "learning_rate": 5.914236957671381e-06, + "loss": 0.11511993408203125, + "step": 10612 + }, + { + "epoch": 0.7173854265242666, + "grad_norm": 0.22714650630950928, + "learning_rate": 5.911611808165508e-06, + "loss": 0.03432464599609375, + "step": 10613 + }, + { + "epoch": 0.7174530214952007, + "grad_norm": 1.9671595096588135, + "learning_rate": 5.908987098412386e-06, + "loss": 0.159820556640625, + "step": 10614 + }, + { + "epoch": 0.717520616466135, + "grad_norm": 2.2739782333374023, + "learning_rate": 5.906362828538993e-06, + "loss": 0.225494384765625, + "step": 10615 + }, + { + "epoch": 0.7175882114370691, + "grad_norm": 0.4025076925754547, + "learning_rate": 5.903738998672326e-06, + "loss": 0.09735107421875, + "step": 10616 + }, + { + "epoch": 0.7176558064080032, + "grad_norm": 1.1444741487503052, + "learning_rate": 5.901115608939333e-06, + "loss": 0.19110107421875, + "step": 10617 + }, + { + "epoch": 0.7177234013789374, + "grad_norm": 0.3352445363998413, + "learning_rate": 5.898492659466953e-06, + "loss": 0.05295753479003906, + "step": 10618 + }, + { + "epoch": 0.7177909963498715, + "grad_norm": 0.474826842546463, + "learning_rate": 5.895870150382101e-06, + "loss": 0.07848358154296875, + "step": 10619 + }, + { + "epoch": 0.7178585913208058, + "grad_norm": 0.8926011919975281, + "learning_rate": 5.893248081811669e-06, + "loss": 0.15106201171875, + "step": 10620 + }, + { + "epoch": 0.7179261862917399, + "grad_norm": 0.8599990606307983, + "learning_rate": 5.890626453882523e-06, + "loss": 0.1451263427734375, + "step": 10621 + }, + { + "epoch": 0.7179937812626741, + "grad_norm": 0.3677208423614502, + "learning_rate": 5.88800526672153e-06, + "loss": 0.0647735595703125, + "step": 10622 + }, + { + "epoch": 0.7180613762336082, + "grad_norm": 0.7129409313201904, + "learning_rate": 5.885384520455505e-06, + "loss": 0.10720062255859375, + "step": 10623 + }, + { + "epoch": 0.7181289712045423, + "grad_norm": 0.7085460424423218, + "learning_rate": 5.882764215211255e-06, + "loss": 0.1197052001953125, + "step": 10624 + }, + { + "epoch": 0.7181965661754766, + "grad_norm": 0.7227972149848938, + "learning_rate": 5.880144351115581e-06, + "loss": 0.12770843505859375, + "step": 10625 + }, + { + "epoch": 0.7182641611464107, + "grad_norm": 0.6678670048713684, + "learning_rate": 5.877524928295229e-06, + "loss": 0.10167694091796875, + "step": 10626 + }, + { + "epoch": 0.7183317561173449, + "grad_norm": 0.6185396313667297, + "learning_rate": 5.8749059468769625e-06, + "loss": 0.09645843505859375, + "step": 10627 + }, + { + "epoch": 0.718399351088279, + "grad_norm": 0.9703912138938904, + "learning_rate": 5.872287406987485e-06, + "loss": 0.18277740478515625, + "step": 10628 + }, + { + "epoch": 0.7184669460592132, + "grad_norm": 0.5184305310249329, + "learning_rate": 5.869669308753514e-06, + "loss": 0.0982818603515625, + "step": 10629 + }, + { + "epoch": 0.7185345410301474, + "grad_norm": 0.8677042126655579, + "learning_rate": 5.867051652301721e-06, + "loss": 0.115386962890625, + "step": 10630 + }, + { + "epoch": 0.7186021360010815, + "grad_norm": 0.6485568284988403, + "learning_rate": 5.8644344377587675e-06, + "loss": 0.1109619140625, + "step": 10631 + }, + { + "epoch": 0.7186697309720157, + "grad_norm": 0.8493589758872986, + "learning_rate": 5.861817665251288e-06, + "loss": 0.1192779541015625, + "step": 10632 + }, + { + "epoch": 0.7187373259429498, + "grad_norm": 0.23846031725406647, + "learning_rate": 5.8592013349059014e-06, + "loss": 0.037616729736328125, + "step": 10633 + }, + { + "epoch": 0.718804920913884, + "grad_norm": 0.21551716327667236, + "learning_rate": 5.856585446849193e-06, + "loss": 0.039859771728515625, + "step": 10634 + }, + { + "epoch": 0.7188725158848182, + "grad_norm": 0.6154969930648804, + "learning_rate": 5.853970001207754e-06, + "loss": 0.09165573120117188, + "step": 10635 + }, + { + "epoch": 0.7189401108557524, + "grad_norm": 0.6087042093276978, + "learning_rate": 5.8513549981081195e-06, + "loss": 0.09543228149414062, + "step": 10636 + }, + { + "epoch": 0.7190077058266865, + "grad_norm": 0.4436051845550537, + "learning_rate": 5.848740437676822e-06, + "loss": 0.09819793701171875, + "step": 10637 + }, + { + "epoch": 0.7190753007976206, + "grad_norm": 0.5544968247413635, + "learning_rate": 5.846126320040382e-06, + "loss": 0.1358795166015625, + "step": 10638 + }, + { + "epoch": 0.7191428957685548, + "grad_norm": 0.27436062693595886, + "learning_rate": 5.843512645325267e-06, + "loss": 0.0550384521484375, + "step": 10639 + }, + { + "epoch": 0.719210490739489, + "grad_norm": 0.5247060060501099, + "learning_rate": 5.840899413657959e-06, + "loss": 0.096923828125, + "step": 10640 + }, + { + "epoch": 0.7192780857104232, + "grad_norm": 0.3883949816226959, + "learning_rate": 5.8382866251648976e-06, + "loss": 0.1057891845703125, + "step": 10641 + }, + { + "epoch": 0.7193456806813573, + "grad_norm": 1.312292218208313, + "learning_rate": 5.835674279972505e-06, + "loss": 0.159393310546875, + "step": 10642 + }, + { + "epoch": 0.7194132756522915, + "grad_norm": 0.9011721611022949, + "learning_rate": 5.833062378207185e-06, + "loss": 0.159210205078125, + "step": 10643 + }, + { + "epoch": 0.7194808706232256, + "grad_norm": 0.8146486878395081, + "learning_rate": 5.8304509199953135e-06, + "loss": 0.1586761474609375, + "step": 10644 + }, + { + "epoch": 0.7195484655941597, + "grad_norm": 0.4651776850223541, + "learning_rate": 5.827839905463252e-06, + "loss": 0.08370208740234375, + "step": 10645 + }, + { + "epoch": 0.719616060565094, + "grad_norm": 0.3998546004295349, + "learning_rate": 5.825229334737337e-06, + "loss": 0.06461334228515625, + "step": 10646 + }, + { + "epoch": 0.7196836555360281, + "grad_norm": 0.44776928424835205, + "learning_rate": 5.822619207943884e-06, + "loss": 0.105194091796875, + "step": 10647 + }, + { + "epoch": 0.7197512505069623, + "grad_norm": 0.18201342225074768, + "learning_rate": 5.820009525209188e-06, + "loss": 0.03105926513671875, + "step": 10648 + }, + { + "epoch": 0.7198188454778964, + "grad_norm": 0.35998713970184326, + "learning_rate": 5.817400286659519e-06, + "loss": 0.04644966125488281, + "step": 10649 + }, + { + "epoch": 0.7198864404488307, + "grad_norm": 0.4872083067893982, + "learning_rate": 5.814791492421127e-06, + "loss": 0.0893402099609375, + "step": 10650 + }, + { + "epoch": 0.7199540354197648, + "grad_norm": 0.8548750877380371, + "learning_rate": 5.8121831426202535e-06, + "loss": 0.12665557861328125, + "step": 10651 + }, + { + "epoch": 0.7200216303906989, + "grad_norm": 0.5684908032417297, + "learning_rate": 5.8095752373830885e-06, + "loss": 0.1198272705078125, + "step": 10652 + }, + { + "epoch": 0.7200892253616331, + "grad_norm": 1.0150275230407715, + "learning_rate": 5.806967776835833e-06, + "loss": 0.2200164794921875, + "step": 10653 + }, + { + "epoch": 0.7201568203325672, + "grad_norm": 0.606200098991394, + "learning_rate": 5.8043607611046485e-06, + "loss": 0.1342010498046875, + "step": 10654 + }, + { + "epoch": 0.7202244153035015, + "grad_norm": 1.2451720237731934, + "learning_rate": 5.801754190315677e-06, + "loss": 0.19366455078125, + "step": 10655 + }, + { + "epoch": 0.7202920102744356, + "grad_norm": 0.18259312212467194, + "learning_rate": 5.799148064595039e-06, + "loss": 0.03275299072265625, + "step": 10656 + }, + { + "epoch": 0.7203596052453698, + "grad_norm": 0.21739616990089417, + "learning_rate": 5.796542384068839e-06, + "loss": 0.03577423095703125, + "step": 10657 + }, + { + "epoch": 0.7204272002163039, + "grad_norm": 0.5389594435691833, + "learning_rate": 5.793937148863155e-06, + "loss": 0.075714111328125, + "step": 10658 + }, + { + "epoch": 0.720494795187238, + "grad_norm": 0.5155824422836304, + "learning_rate": 5.791332359104042e-06, + "loss": 0.109375, + "step": 10659 + }, + { + "epoch": 0.7205623901581723, + "grad_norm": 0.5689169764518738, + "learning_rate": 5.788728014917538e-06, + "loss": 0.0786895751953125, + "step": 10660 + }, + { + "epoch": 0.7206299851291064, + "grad_norm": 0.8932096362113953, + "learning_rate": 5.786124116429656e-06, + "loss": 0.1589813232421875, + "step": 10661 + }, + { + "epoch": 0.7206975801000406, + "grad_norm": 0.8940838575363159, + "learning_rate": 5.783520663766391e-06, + "loss": 0.1305084228515625, + "step": 10662 + }, + { + "epoch": 0.7207651750709747, + "grad_norm": 0.6266250014305115, + "learning_rate": 5.780917657053708e-06, + "loss": 0.10105133056640625, + "step": 10663 + }, + { + "epoch": 0.7208327700419089, + "grad_norm": 0.3855235278606415, + "learning_rate": 5.778315096417571e-06, + "loss": 0.0807952880859375, + "step": 10664 + }, + { + "epoch": 0.720900365012843, + "grad_norm": 0.6335029602050781, + "learning_rate": 5.77571298198389e-06, + "loss": 0.091278076171875, + "step": 10665 + }, + { + "epoch": 0.7209679599837772, + "grad_norm": 1.4548124074935913, + "learning_rate": 5.773111313878584e-06, + "loss": 0.1454296112060547, + "step": 10666 + }, + { + "epoch": 0.7210355549547114, + "grad_norm": 1.075618028640747, + "learning_rate": 5.770510092227533e-06, + "loss": 0.146728515625, + "step": 10667 + }, + { + "epoch": 0.7211031499256455, + "grad_norm": 1.4577292203903198, + "learning_rate": 5.767909317156601e-06, + "loss": 0.2564697265625, + "step": 10668 + }, + { + "epoch": 0.7211707448965797, + "grad_norm": 0.8155187964439392, + "learning_rate": 5.765308988791632e-06, + "loss": 0.12310791015625, + "step": 10669 + }, + { + "epoch": 0.7212383398675138, + "grad_norm": 1.0933942794799805, + "learning_rate": 5.762709107258443e-06, + "loss": 0.11362457275390625, + "step": 10670 + }, + { + "epoch": 0.7213059348384481, + "grad_norm": 0.45317453145980835, + "learning_rate": 5.760109672682835e-06, + "loss": 0.05718231201171875, + "step": 10671 + }, + { + "epoch": 0.7213735298093822, + "grad_norm": 0.52712082862854, + "learning_rate": 5.7575106851905825e-06, + "loss": 0.0963592529296875, + "step": 10672 + }, + { + "epoch": 0.7214411247803163, + "grad_norm": 0.21547245979309082, + "learning_rate": 5.754912144907441e-06, + "loss": 0.04134368896484375, + "step": 10673 + }, + { + "epoch": 0.7215087197512505, + "grad_norm": 0.2720082402229309, + "learning_rate": 5.752314051959145e-06, + "loss": 0.043731689453125, + "step": 10674 + }, + { + "epoch": 0.7215763147221846, + "grad_norm": 0.8452115058898926, + "learning_rate": 5.749716406471406e-06, + "loss": 0.15142822265625, + "step": 10675 + }, + { + "epoch": 0.7216439096931189, + "grad_norm": 0.7692596912384033, + "learning_rate": 5.747119208569907e-06, + "loss": 0.09954833984375, + "step": 10676 + }, + { + "epoch": 0.721711504664053, + "grad_norm": 0.7880877256393433, + "learning_rate": 5.744522458380337e-06, + "loss": 0.127471923828125, + "step": 10677 + }, + { + "epoch": 0.7217790996349872, + "grad_norm": 0.6332391500473022, + "learning_rate": 5.7419261560283185e-06, + "loss": 0.07500457763671875, + "step": 10678 + }, + { + "epoch": 0.7218466946059213, + "grad_norm": 0.8982301354408264, + "learning_rate": 5.739330301639492e-06, + "loss": 0.128631591796875, + "step": 10679 + }, + { + "epoch": 0.7219142895768554, + "grad_norm": 0.9031897783279419, + "learning_rate": 5.7367348953394575e-06, + "loss": 0.1474761962890625, + "step": 10680 + }, + { + "epoch": 0.7219818845477897, + "grad_norm": 0.21770718693733215, + "learning_rate": 5.734139937253796e-06, + "loss": 0.038970947265625, + "step": 10681 + }, + { + "epoch": 0.7220494795187238, + "grad_norm": 0.8036870360374451, + "learning_rate": 5.731545427508069e-06, + "loss": 0.128173828125, + "step": 10682 + }, + { + "epoch": 0.722117074489658, + "grad_norm": 0.9406044483184814, + "learning_rate": 5.728951366227814e-06, + "loss": 0.1408233642578125, + "step": 10683 + }, + { + "epoch": 0.7221846694605921, + "grad_norm": 0.2721741497516632, + "learning_rate": 5.726357753538547e-06, + "loss": 0.03717041015625, + "step": 10684 + }, + { + "epoch": 0.7222522644315263, + "grad_norm": 0.29120609164237976, + "learning_rate": 5.723764589565765e-06, + "loss": 0.032817840576171875, + "step": 10685 + }, + { + "epoch": 0.7223198594024605, + "grad_norm": 0.25033754110336304, + "learning_rate": 5.72117187443494e-06, + "loss": 0.03888702392578125, + "step": 10686 + }, + { + "epoch": 0.7223874543733946, + "grad_norm": 0.2934402823448181, + "learning_rate": 5.7185796082715184e-06, + "loss": 0.04625701904296875, + "step": 10687 + }, + { + "epoch": 0.7224550493443288, + "grad_norm": 0.27801480889320374, + "learning_rate": 5.715987791200947e-06, + "loss": 0.050018310546875, + "step": 10688 + }, + { + "epoch": 0.7225226443152629, + "grad_norm": 0.2914121448993683, + "learning_rate": 5.713396423348613e-06, + "loss": 0.031494140625, + "step": 10689 + }, + { + "epoch": 0.7225902392861971, + "grad_norm": 0.7409125566482544, + "learning_rate": 5.710805504839923e-06, + "loss": 0.1285552978515625, + "step": 10690 + }, + { + "epoch": 0.7226578342571313, + "grad_norm": 0.2669549584388733, + "learning_rate": 5.70821503580022e-06, + "loss": 0.04158782958984375, + "step": 10691 + }, + { + "epoch": 0.7227254292280655, + "grad_norm": 0.7441031336784363, + "learning_rate": 5.705625016354865e-06, + "loss": 0.1276092529296875, + "step": 10692 + }, + { + "epoch": 0.7227930241989996, + "grad_norm": 0.5979465246200562, + "learning_rate": 5.703035446629176e-06, + "loss": 0.1053314208984375, + "step": 10693 + }, + { + "epoch": 0.7228606191699337, + "grad_norm": 0.24782533943653107, + "learning_rate": 5.700446326748442e-06, + "loss": 0.053096771240234375, + "step": 10694 + }, + { + "epoch": 0.7229282141408679, + "grad_norm": 0.40180400013923645, + "learning_rate": 5.697857656837952e-06, + "loss": 0.05755615234375, + "step": 10695 + }, + { + "epoch": 0.7229958091118021, + "grad_norm": 1.441595435142517, + "learning_rate": 5.695269437022957e-06, + "loss": 0.15997314453125, + "step": 10696 + }, + { + "epoch": 0.7230634040827363, + "grad_norm": 0.6892858147621155, + "learning_rate": 5.692681667428693e-06, + "loss": 0.13995361328125, + "step": 10697 + }, + { + "epoch": 0.7231309990536704, + "grad_norm": 0.1585475504398346, + "learning_rate": 5.690094348180372e-06, + "loss": 0.019288063049316406, + "step": 10698 + }, + { + "epoch": 0.7231985940246046, + "grad_norm": 0.7551172971725464, + "learning_rate": 5.687507479403183e-06, + "loss": 0.1561431884765625, + "step": 10699 + }, + { + "epoch": 0.7232661889955387, + "grad_norm": 0.8746978044509888, + "learning_rate": 5.684921061222291e-06, + "loss": 0.18373870849609375, + "step": 10700 + }, + { + "epoch": 0.7233337839664729, + "grad_norm": 0.42978280782699585, + "learning_rate": 5.682335093762858e-06, + "loss": 0.05280303955078125, + "step": 10701 + }, + { + "epoch": 0.7234013789374071, + "grad_norm": 0.9210246801376343, + "learning_rate": 5.679749577149989e-06, + "loss": 0.21038818359375, + "step": 10702 + }, + { + "epoch": 0.7234689739083412, + "grad_norm": 0.5155279636383057, + "learning_rate": 5.6771645115088035e-06, + "loss": 0.07863998413085938, + "step": 10703 + }, + { + "epoch": 0.7235365688792754, + "grad_norm": 0.2905105948448181, + "learning_rate": 5.6745798969643756e-06, + "loss": 0.05345916748046875, + "step": 10704 + }, + { + "epoch": 0.7236041638502095, + "grad_norm": 0.27473166584968567, + "learning_rate": 5.671995733641769e-06, + "loss": 0.0494384765625, + "step": 10705 + }, + { + "epoch": 0.7236717588211438, + "grad_norm": 0.926188588142395, + "learning_rate": 5.669412021666021e-06, + "loss": 0.15720367431640625, + "step": 10706 + }, + { + "epoch": 0.7237393537920779, + "grad_norm": 0.2907699644565582, + "learning_rate": 5.666828761162136e-06, + "loss": 0.05108642578125, + "step": 10707 + }, + { + "epoch": 0.723806948763012, + "grad_norm": 0.35874542593955994, + "learning_rate": 5.664245952255125e-06, + "loss": 0.0592041015625, + "step": 10708 + }, + { + "epoch": 0.7238745437339462, + "grad_norm": 0.464478462934494, + "learning_rate": 5.661663595069952e-06, + "loss": 0.09283447265625, + "step": 10709 + }, + { + "epoch": 0.7239421387048803, + "grad_norm": 0.35365861654281616, + "learning_rate": 5.6590816897315685e-06, + "loss": 0.061065673828125, + "step": 10710 + }, + { + "epoch": 0.7240097336758146, + "grad_norm": 0.8430835008621216, + "learning_rate": 5.656500236364905e-06, + "loss": 0.1179962158203125, + "step": 10711 + }, + { + "epoch": 0.7240773286467487, + "grad_norm": 0.3274070620536804, + "learning_rate": 5.653919235094865e-06, + "loss": 0.04561614990234375, + "step": 10712 + }, + { + "epoch": 0.7241449236176828, + "grad_norm": 0.6798168420791626, + "learning_rate": 5.651338686046329e-06, + "loss": 0.1059417724609375, + "step": 10713 + }, + { + "epoch": 0.724212518588617, + "grad_norm": 0.6501064300537109, + "learning_rate": 5.648758589344179e-06, + "loss": 0.10257720947265625, + "step": 10714 + }, + { + "epoch": 0.7242801135595511, + "grad_norm": 1.552046537399292, + "learning_rate": 5.646178945113232e-06, + "loss": 0.108154296875, + "step": 10715 + }, + { + "epoch": 0.7243477085304854, + "grad_norm": 0.8112305998802185, + "learning_rate": 5.643599753478322e-06, + "loss": 0.1464996337890625, + "step": 10716 + }, + { + "epoch": 0.7244153035014195, + "grad_norm": 0.6890695095062256, + "learning_rate": 5.6410210145642485e-06, + "loss": 0.122467041015625, + "step": 10717 + }, + { + "epoch": 0.7244828984723537, + "grad_norm": 0.691204309463501, + "learning_rate": 5.638442728495773e-06, + "loss": 0.1315765380859375, + "step": 10718 + }, + { + "epoch": 0.7245504934432878, + "grad_norm": 0.4523400068283081, + "learning_rate": 5.635864895397665e-06, + "loss": 0.0725555419921875, + "step": 10719 + }, + { + "epoch": 0.7246180884142219, + "grad_norm": 0.6859564185142517, + "learning_rate": 5.633287515394638e-06, + "loss": 0.139678955078125, + "step": 10720 + }, + { + "epoch": 0.7246856833851562, + "grad_norm": 0.4290578067302704, + "learning_rate": 5.630710588611419e-06, + "loss": 0.08936309814453125, + "step": 10721 + }, + { + "epoch": 0.7247532783560903, + "grad_norm": 1.0914125442504883, + "learning_rate": 5.628134115172687e-06, + "loss": 0.187408447265625, + "step": 10722 + }, + { + "epoch": 0.7248208733270245, + "grad_norm": 0.2952689230442047, + "learning_rate": 5.625558095203112e-06, + "loss": 0.051662445068359375, + "step": 10723 + }, + { + "epoch": 0.7248884682979586, + "grad_norm": 0.9159207344055176, + "learning_rate": 5.622982528827334e-06, + "loss": 0.17218017578125, + "step": 10724 + }, + { + "epoch": 0.7249560632688928, + "grad_norm": 1.0329508781433105, + "learning_rate": 5.620407416169975e-06, + "loss": 0.121307373046875, + "step": 10725 + }, + { + "epoch": 0.725023658239827, + "grad_norm": 0.572704017162323, + "learning_rate": 5.617832757355633e-06, + "loss": 0.09906768798828125, + "step": 10726 + }, + { + "epoch": 0.7250912532107611, + "grad_norm": 0.4519895315170288, + "learning_rate": 5.615258552508897e-06, + "loss": 0.07192230224609375, + "step": 10727 + }, + { + "epoch": 0.7251588481816953, + "grad_norm": 0.2956032156944275, + "learning_rate": 5.612684801754312e-06, + "loss": 0.03333282470703125, + "step": 10728 + }, + { + "epoch": 0.7252264431526294, + "grad_norm": 0.41381341218948364, + "learning_rate": 5.6101115052164106e-06, + "loss": 0.0670166015625, + "step": 10729 + }, + { + "epoch": 0.7252940381235636, + "grad_norm": 0.3189420998096466, + "learning_rate": 5.607538663019719e-06, + "loss": 0.049602508544921875, + "step": 10730 + }, + { + "epoch": 0.7253616330944977, + "grad_norm": 0.8335511684417725, + "learning_rate": 5.604966275288709e-06, + "loss": 0.133575439453125, + "step": 10731 + }, + { + "epoch": 0.725429228065432, + "grad_norm": 0.5448535084724426, + "learning_rate": 5.602394342147862e-06, + "loss": 0.09989166259765625, + "step": 10732 + }, + { + "epoch": 0.7254968230363661, + "grad_norm": 1.5636370182037354, + "learning_rate": 5.599822863721619e-06, + "loss": 0.21771240234375, + "step": 10733 + }, + { + "epoch": 0.7255644180073002, + "grad_norm": 1.8986544609069824, + "learning_rate": 5.597251840134407e-06, + "loss": 0.15206146240234375, + "step": 10734 + }, + { + "epoch": 0.7256320129782344, + "grad_norm": 0.7531210780143738, + "learning_rate": 5.594681271510624e-06, + "loss": 0.09566020965576172, + "step": 10735 + }, + { + "epoch": 0.7256996079491685, + "grad_norm": 1.7268210649490356, + "learning_rate": 5.592111157974652e-06, + "loss": 0.29150390625, + "step": 10736 + }, + { + "epoch": 0.7257672029201028, + "grad_norm": 0.4199133813381195, + "learning_rate": 5.589541499650849e-06, + "loss": 0.06440162658691406, + "step": 10737 + }, + { + "epoch": 0.7258347978910369, + "grad_norm": 0.8197304606437683, + "learning_rate": 5.586972296663552e-06, + "loss": 0.1286163330078125, + "step": 10738 + }, + { + "epoch": 0.7259023928619711, + "grad_norm": 0.8276000618934631, + "learning_rate": 5.5844035491370714e-06, + "loss": 0.1668701171875, + "step": 10739 + }, + { + "epoch": 0.7259699878329052, + "grad_norm": 0.6796982288360596, + "learning_rate": 5.5818352571957095e-06, + "loss": 0.11977386474609375, + "step": 10740 + }, + { + "epoch": 0.7260375828038393, + "grad_norm": 0.7541150450706482, + "learning_rate": 5.579267420963725e-06, + "loss": 0.11560249328613281, + "step": 10741 + }, + { + "epoch": 0.7261051777747736, + "grad_norm": 0.55162113904953, + "learning_rate": 5.5767000405653645e-06, + "loss": 0.1157989501953125, + "step": 10742 + }, + { + "epoch": 0.7261727727457077, + "grad_norm": 0.628913402557373, + "learning_rate": 5.574133116124869e-06, + "loss": 0.0974884033203125, + "step": 10743 + }, + { + "epoch": 0.7262403677166419, + "grad_norm": 1.0862388610839844, + "learning_rate": 5.57156664776642e-06, + "loss": 0.181488037109375, + "step": 10744 + }, + { + "epoch": 0.726307962687576, + "grad_norm": 1.0794316530227661, + "learning_rate": 5.569000635614219e-06, + "loss": 0.208740234375, + "step": 10745 + }, + { + "epoch": 0.7263755576585103, + "grad_norm": 1.2442210912704468, + "learning_rate": 5.566435079792417e-06, + "loss": 0.14803695678710938, + "step": 10746 + }, + { + "epoch": 0.7264431526294444, + "grad_norm": 0.2441357523202896, + "learning_rate": 5.563869980425152e-06, + "loss": 0.050323486328125, + "step": 10747 + }, + { + "epoch": 0.7265107476003785, + "grad_norm": 0.3937353193759918, + "learning_rate": 5.561305337636542e-06, + "loss": 0.06993865966796875, + "step": 10748 + }, + { + "epoch": 0.7265783425713127, + "grad_norm": 1.0437512397766113, + "learning_rate": 5.558741151550676e-06, + "loss": 0.11215591430664062, + "step": 10749 + }, + { + "epoch": 0.7266459375422468, + "grad_norm": 0.24201858043670654, + "learning_rate": 5.556177422291625e-06, + "loss": 0.037689208984375, + "step": 10750 + }, + { + "epoch": 0.726713532513181, + "grad_norm": 1.3575758934020996, + "learning_rate": 5.5536141499834485e-06, + "loss": 0.182281494140625, + "step": 10751 + }, + { + "epoch": 0.7267811274841152, + "grad_norm": 0.7522835731506348, + "learning_rate": 5.551051334750162e-06, + "loss": 0.14997100830078125, + "step": 10752 + }, + { + "epoch": 0.7268487224550494, + "grad_norm": 0.34825897216796875, + "learning_rate": 5.5484889767157735e-06, + "loss": 0.044864654541015625, + "step": 10753 + }, + { + "epoch": 0.7269163174259835, + "grad_norm": 0.6531705856323242, + "learning_rate": 5.545927076004268e-06, + "loss": 0.1143035888671875, + "step": 10754 + }, + { + "epoch": 0.7269839123969176, + "grad_norm": 0.308994323015213, + "learning_rate": 5.5433656327395995e-06, + "loss": 0.0549468994140625, + "step": 10755 + }, + { + "epoch": 0.7270515073678518, + "grad_norm": 0.4758914113044739, + "learning_rate": 5.540804647045721e-06, + "loss": 0.037795066833496094, + "step": 10756 + }, + { + "epoch": 0.727119102338786, + "grad_norm": 0.5590283274650574, + "learning_rate": 5.53824411904653e-06, + "loss": 0.08892059326171875, + "step": 10757 + }, + { + "epoch": 0.7271866973097202, + "grad_norm": 0.6134048104286194, + "learning_rate": 5.535684048865936e-06, + "loss": 0.0902557373046875, + "step": 10758 + }, + { + "epoch": 0.7272542922806543, + "grad_norm": 0.6186431646347046, + "learning_rate": 5.533124436627805e-06, + "loss": 0.10761260986328125, + "step": 10759 + }, + { + "epoch": 0.7273218872515885, + "grad_norm": 0.17853626608848572, + "learning_rate": 5.5305652824559884e-06, + "loss": 0.029346466064453125, + "step": 10760 + }, + { + "epoch": 0.7273894822225226, + "grad_norm": 0.34515488147735596, + "learning_rate": 5.528006586474313e-06, + "loss": 0.07338428497314453, + "step": 10761 + }, + { + "epoch": 0.7274570771934568, + "grad_norm": 0.19843333959579468, + "learning_rate": 5.525448348806584e-06, + "loss": 0.028469085693359375, + "step": 10762 + }, + { + "epoch": 0.727524672164391, + "grad_norm": 0.7283393144607544, + "learning_rate": 5.522890569576587e-06, + "loss": 0.1307373046875, + "step": 10763 + }, + { + "epoch": 0.7275922671353251, + "grad_norm": 1.6516746282577515, + "learning_rate": 5.52033324890808e-06, + "loss": 0.235626220703125, + "step": 10764 + }, + { + "epoch": 0.7276598621062593, + "grad_norm": 1.5521005392074585, + "learning_rate": 5.517776386924805e-06, + "loss": 0.1690673828125, + "step": 10765 + }, + { + "epoch": 0.7277274570771934, + "grad_norm": 0.45347630977630615, + "learning_rate": 5.515219983750477e-06, + "loss": 0.076629638671875, + "step": 10766 + }, + { + "epoch": 0.7277950520481277, + "grad_norm": 0.395748108625412, + "learning_rate": 5.512664039508793e-06, + "loss": 0.05066680908203125, + "step": 10767 + }, + { + "epoch": 0.7278626470190618, + "grad_norm": 0.7248830199241638, + "learning_rate": 5.510108554323416e-06, + "loss": 0.09362220764160156, + "step": 10768 + }, + { + "epoch": 0.7279302419899959, + "grad_norm": 0.7661983370780945, + "learning_rate": 5.507553528318016e-06, + "loss": 0.1647796630859375, + "step": 10769 + }, + { + "epoch": 0.7279978369609301, + "grad_norm": 0.22582784295082092, + "learning_rate": 5.504998961616199e-06, + "loss": 0.04302978515625, + "step": 10770 + }, + { + "epoch": 0.7280654319318642, + "grad_norm": 0.9160343408584595, + "learning_rate": 5.502444854341586e-06, + "loss": 0.191070556640625, + "step": 10771 + }, + { + "epoch": 0.7281330269027985, + "grad_norm": 0.29280582070350647, + "learning_rate": 5.499891206617756e-06, + "loss": 0.033527374267578125, + "step": 10772 + }, + { + "epoch": 0.7282006218737326, + "grad_norm": 0.31931281089782715, + "learning_rate": 5.49733801856827e-06, + "loss": 0.054256439208984375, + "step": 10773 + }, + { + "epoch": 0.7282682168446668, + "grad_norm": 0.2927650511264801, + "learning_rate": 5.494785290316665e-06, + "loss": 0.04175567626953125, + "step": 10774 + }, + { + "epoch": 0.7283358118156009, + "grad_norm": 0.9061463475227356, + "learning_rate": 5.492233021986461e-06, + "loss": 0.135589599609375, + "step": 10775 + }, + { + "epoch": 0.728403406786535, + "grad_norm": 0.3083031475543976, + "learning_rate": 5.489681213701153e-06, + "loss": 0.0391387939453125, + "step": 10776 + }, + { + "epoch": 0.7284710017574693, + "grad_norm": 1.2210856676101685, + "learning_rate": 5.487129865584212e-06, + "loss": 0.178619384765625, + "step": 10777 + }, + { + "epoch": 0.7285385967284034, + "grad_norm": 0.8028357625007629, + "learning_rate": 5.484578977759087e-06, + "loss": 0.128936767578125, + "step": 10778 + }, + { + "epoch": 0.7286061916993376, + "grad_norm": 0.2146526575088501, + "learning_rate": 5.482028550349201e-06, + "loss": 0.037750244140625, + "step": 10779 + }, + { + "epoch": 0.7286737866702717, + "grad_norm": 0.6725479960441589, + "learning_rate": 5.479478583477977e-06, + "loss": 0.12960052490234375, + "step": 10780 + }, + { + "epoch": 0.7287413816412059, + "grad_norm": 0.3543128967285156, + "learning_rate": 5.476929077268775e-06, + "loss": 0.05579376220703125, + "step": 10781 + }, + { + "epoch": 0.7288089766121401, + "grad_norm": 0.584054708480835, + "learning_rate": 5.474380031844979e-06, + "loss": 0.09675979614257812, + "step": 10782 + }, + { + "epoch": 0.7288765715830742, + "grad_norm": 0.933061420917511, + "learning_rate": 5.471831447329906e-06, + "loss": 0.143402099609375, + "step": 10783 + }, + { + "epoch": 0.7289441665540084, + "grad_norm": 0.5385454893112183, + "learning_rate": 5.469283323846887e-06, + "loss": 0.0927276611328125, + "step": 10784 + }, + { + "epoch": 0.7290117615249425, + "grad_norm": 1.286931037902832, + "learning_rate": 5.466735661519212e-06, + "loss": 0.22003173828125, + "step": 10785 + }, + { + "epoch": 0.7290793564958767, + "grad_norm": 0.5037186145782471, + "learning_rate": 5.464188460470153e-06, + "loss": 0.076507568359375, + "step": 10786 + }, + { + "epoch": 0.7291469514668109, + "grad_norm": 0.3636317551136017, + "learning_rate": 5.461641720822957e-06, + "loss": 0.0710601806640625, + "step": 10787 + }, + { + "epoch": 0.7292145464377451, + "grad_norm": 0.3368382453918457, + "learning_rate": 5.459095442700855e-06, + "loss": 0.0625457763671875, + "step": 10788 + }, + { + "epoch": 0.7292821414086792, + "grad_norm": 0.44629669189453125, + "learning_rate": 5.4565496262270485e-06, + "loss": 0.09433746337890625, + "step": 10789 + }, + { + "epoch": 0.7293497363796133, + "grad_norm": 0.6725805997848511, + "learning_rate": 5.454004271524721e-06, + "loss": 0.08722686767578125, + "step": 10790 + }, + { + "epoch": 0.7294173313505475, + "grad_norm": 0.47359180450439453, + "learning_rate": 5.451459378717034e-06, + "loss": 0.0680084228515625, + "step": 10791 + }, + { + "epoch": 0.7294849263214817, + "grad_norm": 0.7412232160568237, + "learning_rate": 5.448914947927118e-06, + "loss": 0.10401153564453125, + "step": 10792 + }, + { + "epoch": 0.7295525212924159, + "grad_norm": 0.6291791796684265, + "learning_rate": 5.446370979278105e-06, + "loss": 0.1031646728515625, + "step": 10793 + }, + { + "epoch": 0.72962011626335, + "grad_norm": 0.38962623476982117, + "learning_rate": 5.4438274728930675e-06, + "loss": 0.0554046630859375, + "step": 10794 + }, + { + "epoch": 0.7296877112342842, + "grad_norm": 0.7110332250595093, + "learning_rate": 5.44128442889509e-06, + "loss": 0.117584228515625, + "step": 10795 + }, + { + "epoch": 0.7297553062052183, + "grad_norm": 1.2561180591583252, + "learning_rate": 5.438741847407218e-06, + "loss": 0.189697265625, + "step": 10796 + }, + { + "epoch": 0.7298229011761524, + "grad_norm": 1.5772961378097534, + "learning_rate": 5.436199728552476e-06, + "loss": 0.241455078125, + "step": 10797 + }, + { + "epoch": 0.7298904961470867, + "grad_norm": 0.722108006477356, + "learning_rate": 5.433658072453873e-06, + "loss": 0.11240768432617188, + "step": 10798 + }, + { + "epoch": 0.7299580911180208, + "grad_norm": 0.8596794009208679, + "learning_rate": 5.431116879234374e-06, + "loss": 0.1258087158203125, + "step": 10799 + }, + { + "epoch": 0.730025686088955, + "grad_norm": 1.0643478631973267, + "learning_rate": 5.428576149016954e-06, + "loss": 0.14986419677734375, + "step": 10800 + }, + { + "epoch": 0.7300932810598891, + "grad_norm": 0.504538893699646, + "learning_rate": 5.426035881924545e-06, + "loss": 0.0695953369140625, + "step": 10801 + }, + { + "epoch": 0.7301608760308234, + "grad_norm": 1.105059266090393, + "learning_rate": 5.423496078080058e-06, + "loss": 0.1434326171875, + "step": 10802 + }, + { + "epoch": 0.7302284710017575, + "grad_norm": 1.613831877708435, + "learning_rate": 5.420956737606389e-06, + "loss": 0.11553382873535156, + "step": 10803 + }, + { + "epoch": 0.7302960659726916, + "grad_norm": 0.19641254842281342, + "learning_rate": 5.418417860626402e-06, + "loss": 0.031429290771484375, + "step": 10804 + }, + { + "epoch": 0.7303636609436258, + "grad_norm": 0.9611285328865051, + "learning_rate": 5.415879447262942e-06, + "loss": 0.14244842529296875, + "step": 10805 + }, + { + "epoch": 0.7304312559145599, + "grad_norm": 0.37842321395874023, + "learning_rate": 5.413341497638847e-06, + "loss": 0.07032203674316406, + "step": 10806 + }, + { + "epoch": 0.7304988508854942, + "grad_norm": 0.8538565039634705, + "learning_rate": 5.4108040118768995e-06, + "loss": 0.192626953125, + "step": 10807 + }, + { + "epoch": 0.7305664458564283, + "grad_norm": 1.585559606552124, + "learning_rate": 5.4082669900998926e-06, + "loss": 0.214202880859375, + "step": 10808 + }, + { + "epoch": 0.7306340408273625, + "grad_norm": 0.49020230770111084, + "learning_rate": 5.405730432430581e-06, + "loss": 0.07275009155273438, + "step": 10809 + }, + { + "epoch": 0.7307016357982966, + "grad_norm": 0.33202874660491943, + "learning_rate": 5.403194338991696e-06, + "loss": 0.04299163818359375, + "step": 10810 + }, + { + "epoch": 0.7307692307692307, + "grad_norm": 0.21940180659294128, + "learning_rate": 5.400658709905956e-06, + "loss": 0.03035736083984375, + "step": 10811 + }, + { + "epoch": 0.730836825740165, + "grad_norm": 1.125450611114502, + "learning_rate": 5.398123545296034e-06, + "loss": 0.210479736328125, + "step": 10812 + }, + { + "epoch": 0.7309044207110991, + "grad_norm": 0.28903746604919434, + "learning_rate": 5.395588845284615e-06, + "loss": 0.04294586181640625, + "step": 10813 + }, + { + "epoch": 0.7309720156820333, + "grad_norm": 0.4539715051651001, + "learning_rate": 5.3930546099943354e-06, + "loss": 0.08700942993164062, + "step": 10814 + }, + { + "epoch": 0.7310396106529674, + "grad_norm": 0.48911353945732117, + "learning_rate": 5.3905208395478205e-06, + "loss": 0.07167434692382812, + "step": 10815 + }, + { + "epoch": 0.7311072056239016, + "grad_norm": 0.5826651453971863, + "learning_rate": 5.387987534067667e-06, + "loss": 0.0689239501953125, + "step": 10816 + }, + { + "epoch": 0.7311748005948357, + "grad_norm": 1.2507439851760864, + "learning_rate": 5.3854546936764534e-06, + "loss": 0.139068603515625, + "step": 10817 + }, + { + "epoch": 0.7312423955657699, + "grad_norm": 1.3463364839553833, + "learning_rate": 5.3829223184967285e-06, + "loss": 0.16986846923828125, + "step": 10818 + }, + { + "epoch": 0.7313099905367041, + "grad_norm": 0.6483510136604309, + "learning_rate": 5.38039040865104e-06, + "loss": 0.120758056640625, + "step": 10819 + }, + { + "epoch": 0.7313775855076382, + "grad_norm": 0.5251507759094238, + "learning_rate": 5.377858964261878e-06, + "loss": 0.060245513916015625, + "step": 10820 + }, + { + "epoch": 0.7314451804785724, + "grad_norm": 0.255796879529953, + "learning_rate": 5.375327985451742e-06, + "loss": 0.042591094970703125, + "step": 10821 + }, + { + "epoch": 0.7315127754495065, + "grad_norm": 0.9680492281913757, + "learning_rate": 5.3727974723431e-06, + "loss": 0.161102294921875, + "step": 10822 + }, + { + "epoch": 0.7315803704204408, + "grad_norm": 0.4720579981803894, + "learning_rate": 5.370267425058376e-06, + "loss": 0.07289886474609375, + "step": 10823 + }, + { + "epoch": 0.7316479653913749, + "grad_norm": 0.9964560270309448, + "learning_rate": 5.3677378437200056e-06, + "loss": 0.1566925048828125, + "step": 10824 + }, + { + "epoch": 0.731715560362309, + "grad_norm": 0.6014738082885742, + "learning_rate": 5.365208728450383e-06, + "loss": 0.125518798828125, + "step": 10825 + }, + { + "epoch": 0.7317831553332432, + "grad_norm": 1.2586591243743896, + "learning_rate": 5.3626800793718786e-06, + "loss": 0.17744827270507812, + "step": 10826 + }, + { + "epoch": 0.7318507503041773, + "grad_norm": 0.36097216606140137, + "learning_rate": 5.360151896606845e-06, + "loss": 0.0723876953125, + "step": 10827 + }, + { + "epoch": 0.7319183452751116, + "grad_norm": 0.5872529745101929, + "learning_rate": 5.3576241802776136e-06, + "loss": 0.11484527587890625, + "step": 10828 + }, + { + "epoch": 0.7319859402460457, + "grad_norm": 0.352763295173645, + "learning_rate": 5.355096930506488e-06, + "loss": 0.07568359375, + "step": 10829 + }, + { + "epoch": 0.7320535352169799, + "grad_norm": 0.8375537395477295, + "learning_rate": 5.352570147415756e-06, + "loss": 0.11063671112060547, + "step": 10830 + }, + { + "epoch": 0.732121130187914, + "grad_norm": 0.6059942841529846, + "learning_rate": 5.3500438311276715e-06, + "loss": 0.0785369873046875, + "step": 10831 + }, + { + "epoch": 0.7321887251588481, + "grad_norm": 0.33863064646720886, + "learning_rate": 5.347517981764486e-06, + "loss": 0.06655120849609375, + "step": 10832 + }, + { + "epoch": 0.7322563201297824, + "grad_norm": 0.2964646518230438, + "learning_rate": 5.344992599448408e-06, + "loss": 0.0482940673828125, + "step": 10833 + }, + { + "epoch": 0.7323239151007165, + "grad_norm": 1.1544185876846313, + "learning_rate": 5.342467684301624e-06, + "loss": 0.190643310546875, + "step": 10834 + }, + { + "epoch": 0.7323915100716507, + "grad_norm": 0.1956651359796524, + "learning_rate": 5.339943236446324e-06, + "loss": 0.03504180908203125, + "step": 10835 + }, + { + "epoch": 0.7324591050425848, + "grad_norm": 0.35510826110839844, + "learning_rate": 5.337419256004635e-06, + "loss": 0.07166290283203125, + "step": 10836 + }, + { + "epoch": 0.7325267000135189, + "grad_norm": 0.7786940932273865, + "learning_rate": 5.334895743098699e-06, + "loss": 0.19769287109375, + "step": 10837 + }, + { + "epoch": 0.7325942949844532, + "grad_norm": 0.5867927670478821, + "learning_rate": 5.332372697850614e-06, + "loss": 0.10638427734375, + "step": 10838 + }, + { + "epoch": 0.7326618899553873, + "grad_norm": 0.37633419036865234, + "learning_rate": 5.329850120382459e-06, + "loss": 0.0649261474609375, + "step": 10839 + }, + { + "epoch": 0.7327294849263215, + "grad_norm": 0.3531416058540344, + "learning_rate": 5.327328010816293e-06, + "loss": 0.05994224548339844, + "step": 10840 + }, + { + "epoch": 0.7327970798972556, + "grad_norm": 0.28248322010040283, + "learning_rate": 5.324806369274153e-06, + "loss": 0.051158905029296875, + "step": 10841 + }, + { + "epoch": 0.7328646748681898, + "grad_norm": 0.9284993410110474, + "learning_rate": 5.322285195878044e-06, + "loss": 0.160736083984375, + "step": 10842 + }, + { + "epoch": 0.732932269839124, + "grad_norm": 0.3888358771800995, + "learning_rate": 5.319764490749972e-06, + "loss": 0.07916259765625, + "step": 10843 + }, + { + "epoch": 0.7329998648100581, + "grad_norm": 0.7880619764328003, + "learning_rate": 5.317244254011885e-06, + "loss": 0.1309814453125, + "step": 10844 + }, + { + "epoch": 0.7330674597809923, + "grad_norm": 0.23711571097373962, + "learning_rate": 5.31472448578575e-06, + "loss": 0.03699207305908203, + "step": 10845 + }, + { + "epoch": 0.7331350547519264, + "grad_norm": 1.3089097738265991, + "learning_rate": 5.312205186193469e-06, + "loss": 0.15888214111328125, + "step": 10846 + }, + { + "epoch": 0.7332026497228606, + "grad_norm": 0.29951050877571106, + "learning_rate": 5.309686355356942e-06, + "loss": 0.04126739501953125, + "step": 10847 + }, + { + "epoch": 0.7332702446937948, + "grad_norm": 0.7525660395622253, + "learning_rate": 5.307167993398066e-06, + "loss": 0.1482086181640625, + "step": 10848 + }, + { + "epoch": 0.733337839664729, + "grad_norm": 0.3995170295238495, + "learning_rate": 5.304650100438671e-06, + "loss": 0.08388519287109375, + "step": 10849 + }, + { + "epoch": 0.7334054346356631, + "grad_norm": 0.8220345973968506, + "learning_rate": 5.302132676600601e-06, + "loss": 0.1324005126953125, + "step": 10850 + }, + { + "epoch": 0.7334730296065972, + "grad_norm": 0.4465561509132385, + "learning_rate": 5.299615722005666e-06, + "loss": 0.08216094970703125, + "step": 10851 + }, + { + "epoch": 0.7335406245775314, + "grad_norm": 0.41745424270629883, + "learning_rate": 5.2970992367756465e-06, + "loss": 0.0429840087890625, + "step": 10852 + }, + { + "epoch": 0.7336082195484656, + "grad_norm": 1.406139850616455, + "learning_rate": 5.2945832210323085e-06, + "loss": 0.161285400390625, + "step": 10853 + }, + { + "epoch": 0.7336758145193998, + "grad_norm": 0.521407425403595, + "learning_rate": 5.2920676748973916e-06, + "loss": 0.07590675354003906, + "step": 10854 + }, + { + "epoch": 0.7337434094903339, + "grad_norm": 0.32857534289360046, + "learning_rate": 5.289552598492607e-06, + "loss": 0.0519866943359375, + "step": 10855 + }, + { + "epoch": 0.7338110044612681, + "grad_norm": 1.138210654258728, + "learning_rate": 5.2870379919396685e-06, + "loss": 0.129669189453125, + "step": 10856 + }, + { + "epoch": 0.7338785994322022, + "grad_norm": 0.9857907295227051, + "learning_rate": 5.28452385536023e-06, + "loss": 0.165740966796875, + "step": 10857 + }, + { + "epoch": 0.7339461944031364, + "grad_norm": 0.7890100479125977, + "learning_rate": 5.282010188875948e-06, + "loss": 0.14072418212890625, + "step": 10858 + }, + { + "epoch": 0.7340137893740706, + "grad_norm": 0.3960345983505249, + "learning_rate": 5.279496992608448e-06, + "loss": 0.0718841552734375, + "step": 10859 + }, + { + "epoch": 0.7340813843450047, + "grad_norm": 0.33163806796073914, + "learning_rate": 5.276984266679329e-06, + "loss": 0.0533599853515625, + "step": 10860 + }, + { + "epoch": 0.7341489793159389, + "grad_norm": 0.9248868227005005, + "learning_rate": 5.27447201121019e-06, + "loss": 0.12258720397949219, + "step": 10861 + }, + { + "epoch": 0.734216574286873, + "grad_norm": 1.1464346647262573, + "learning_rate": 5.271960226322564e-06, + "loss": 0.14465713500976562, + "step": 10862 + }, + { + "epoch": 0.7342841692578073, + "grad_norm": 1.1364179849624634, + "learning_rate": 5.269448912138008e-06, + "loss": 0.207489013671875, + "step": 10863 + }, + { + "epoch": 0.7343517642287414, + "grad_norm": 0.414960116147995, + "learning_rate": 5.266938068778027e-06, + "loss": 0.08231353759765625, + "step": 10864 + }, + { + "epoch": 0.7344193591996755, + "grad_norm": 0.8316663503646851, + "learning_rate": 5.264427696364111e-06, + "loss": 0.12743377685546875, + "step": 10865 + }, + { + "epoch": 0.7344869541706097, + "grad_norm": 1.5869745016098022, + "learning_rate": 5.261917795017729e-06, + "loss": 0.20135498046875, + "step": 10866 + }, + { + "epoch": 0.7345545491415438, + "grad_norm": 0.5027121305465698, + "learning_rate": 5.259408364860324e-06, + "loss": 0.1169586181640625, + "step": 10867 + }, + { + "epoch": 0.7346221441124781, + "grad_norm": 0.8644429445266724, + "learning_rate": 5.256899406013312e-06, + "loss": 0.13219451904296875, + "step": 10868 + }, + { + "epoch": 0.7346897390834122, + "grad_norm": 0.6592795848846436, + "learning_rate": 5.25439091859811e-06, + "loss": 0.14595794677734375, + "step": 10869 + }, + { + "epoch": 0.7347573340543464, + "grad_norm": 0.5810836553573608, + "learning_rate": 5.251882902736077e-06, + "loss": 0.09970855712890625, + "step": 10870 + }, + { + "epoch": 0.7348249290252805, + "grad_norm": 1.0716099739074707, + "learning_rate": 5.249375358548568e-06, + "loss": 0.160125732421875, + "step": 10871 + }, + { + "epoch": 0.7348925239962146, + "grad_norm": 1.1805787086486816, + "learning_rate": 5.2468682861569265e-06, + "loss": 0.1736907958984375, + "step": 10872 + }, + { + "epoch": 0.7349601189671489, + "grad_norm": 0.23494496941566467, + "learning_rate": 5.244361685682441e-06, + "loss": 0.02756500244140625, + "step": 10873 + }, + { + "epoch": 0.735027713938083, + "grad_norm": 0.9036230444908142, + "learning_rate": 5.241855557246417e-06, + "loss": 0.1377716064453125, + "step": 10874 + }, + { + "epoch": 0.7350953089090172, + "grad_norm": 0.3598436415195465, + "learning_rate": 5.239349900970099e-06, + "loss": 0.06153106689453125, + "step": 10875 + }, + { + "epoch": 0.7351629038799513, + "grad_norm": 0.8326475024223328, + "learning_rate": 5.236844716974736e-06, + "loss": 0.17132568359375, + "step": 10876 + }, + { + "epoch": 0.7352304988508855, + "grad_norm": 2.0831778049468994, + "learning_rate": 5.234340005381543e-06, + "loss": 0.18254852294921875, + "step": 10877 + }, + { + "epoch": 0.7352980938218197, + "grad_norm": 0.8083944916725159, + "learning_rate": 5.2318357663117115e-06, + "loss": 0.1579742431640625, + "step": 10878 + }, + { + "epoch": 0.7353656887927538, + "grad_norm": 0.858629047870636, + "learning_rate": 5.229331999886415e-06, + "loss": 0.12821197509765625, + "step": 10879 + }, + { + "epoch": 0.735433283763688, + "grad_norm": 0.35219284892082214, + "learning_rate": 5.226828706226797e-06, + "loss": 0.06040191650390625, + "step": 10880 + }, + { + "epoch": 0.7355008787346221, + "grad_norm": 0.5035409927368164, + "learning_rate": 5.224325885453987e-06, + "loss": 0.087188720703125, + "step": 10881 + }, + { + "epoch": 0.7355684737055563, + "grad_norm": 0.7543391585350037, + "learning_rate": 5.221823537689084e-06, + "loss": 0.11427879333496094, + "step": 10882 + }, + { + "epoch": 0.7356360686764905, + "grad_norm": 0.4722796678543091, + "learning_rate": 5.219321663053168e-06, + "loss": 0.0930633544921875, + "step": 10883 + }, + { + "epoch": 0.7357036636474247, + "grad_norm": 0.42341741919517517, + "learning_rate": 5.216820261667291e-06, + "loss": 0.062225341796875, + "step": 10884 + }, + { + "epoch": 0.7357712586183588, + "grad_norm": 1.5962098836898804, + "learning_rate": 5.2143193336525e-06, + "loss": 0.2689208984375, + "step": 10885 + }, + { + "epoch": 0.7358388535892929, + "grad_norm": 0.5359484553337097, + "learning_rate": 5.211818879129785e-06, + "loss": 0.07808685302734375, + "step": 10886 + }, + { + "epoch": 0.7359064485602271, + "grad_norm": 0.9146438837051392, + "learning_rate": 5.2093188982201566e-06, + "loss": 0.14385986328125, + "step": 10887 + }, + { + "epoch": 0.7359740435311612, + "grad_norm": 0.562109112739563, + "learning_rate": 5.206819391044557e-06, + "loss": 0.0892333984375, + "step": 10888 + }, + { + "epoch": 0.7360416385020955, + "grad_norm": 0.3432377576828003, + "learning_rate": 5.204320357723943e-06, + "loss": 0.06162261962890625, + "step": 10889 + }, + { + "epoch": 0.7361092334730296, + "grad_norm": 0.2930159270763397, + "learning_rate": 5.201821798379231e-06, + "loss": 0.052837371826171875, + "step": 10890 + }, + { + "epoch": 0.7361768284439638, + "grad_norm": 0.39938557147979736, + "learning_rate": 5.199323713131311e-06, + "loss": 0.08625030517578125, + "step": 10891 + }, + { + "epoch": 0.7362444234148979, + "grad_norm": 0.4345480799674988, + "learning_rate": 5.196826102101061e-06, + "loss": 0.06561279296875, + "step": 10892 + }, + { + "epoch": 0.736312018385832, + "grad_norm": 0.40353924036026, + "learning_rate": 5.194328965409331e-06, + "loss": 0.076751708984375, + "step": 10893 + }, + { + "epoch": 0.7363796133567663, + "grad_norm": 0.8094905018806458, + "learning_rate": 5.191832303176945e-06, + "loss": 0.15521240234375, + "step": 10894 + }, + { + "epoch": 0.7364472083277004, + "grad_norm": 0.45959722995758057, + "learning_rate": 5.1893361155247075e-06, + "loss": 0.061367034912109375, + "step": 10895 + }, + { + "epoch": 0.7365148032986346, + "grad_norm": 0.4969048798084259, + "learning_rate": 5.186840402573402e-06, + "loss": 0.06775665283203125, + "step": 10896 + }, + { + "epoch": 0.7365823982695687, + "grad_norm": 0.3837975859642029, + "learning_rate": 5.1843451644437805e-06, + "loss": 0.060207366943359375, + "step": 10897 + }, + { + "epoch": 0.736649993240503, + "grad_norm": 0.6720172166824341, + "learning_rate": 5.181850401256593e-06, + "loss": 0.11112213134765625, + "step": 10898 + }, + { + "epoch": 0.7367175882114371, + "grad_norm": 0.4129827320575714, + "learning_rate": 5.1793561131325304e-06, + "loss": 0.0749359130859375, + "step": 10899 + }, + { + "epoch": 0.7367851831823712, + "grad_norm": 0.8117125630378723, + "learning_rate": 5.176862300192298e-06, + "loss": 0.12664794921875, + "step": 10900 + }, + { + "epoch": 0.7368527781533054, + "grad_norm": 0.6973400712013245, + "learning_rate": 5.174368962556557e-06, + "loss": 0.12708282470703125, + "step": 10901 + }, + { + "epoch": 0.7369203731242395, + "grad_norm": 0.31081387400627136, + "learning_rate": 5.171876100345952e-06, + "loss": 0.0597076416015625, + "step": 10902 + }, + { + "epoch": 0.7369879680951738, + "grad_norm": 0.41301003098487854, + "learning_rate": 5.1693837136811e-06, + "loss": 0.0848388671875, + "step": 10903 + }, + { + "epoch": 0.7370555630661079, + "grad_norm": 0.19041578471660614, + "learning_rate": 5.1668918026826e-06, + "loss": 0.037109375, + "step": 10904 + }, + { + "epoch": 0.7371231580370421, + "grad_norm": 0.4084036648273468, + "learning_rate": 5.164400367471027e-06, + "loss": 0.071502685546875, + "step": 10905 + }, + { + "epoch": 0.7371907530079762, + "grad_norm": 0.4510508179664612, + "learning_rate": 5.161909408166931e-06, + "loss": 0.08929443359375, + "step": 10906 + }, + { + "epoch": 0.7372583479789103, + "grad_norm": 0.8814932107925415, + "learning_rate": 5.159418924890839e-06, + "loss": 0.12911224365234375, + "step": 10907 + }, + { + "epoch": 0.7373259429498445, + "grad_norm": 0.49860355257987976, + "learning_rate": 5.156928917763258e-06, + "loss": 0.11246490478515625, + "step": 10908 + }, + { + "epoch": 0.7373935379207787, + "grad_norm": 0.27561625838279724, + "learning_rate": 5.154439386904671e-06, + "loss": 0.0397491455078125, + "step": 10909 + }, + { + "epoch": 0.7374611328917129, + "grad_norm": 0.5539155602455139, + "learning_rate": 5.151950332435528e-06, + "loss": 0.08787727355957031, + "step": 10910 + }, + { + "epoch": 0.737528727862647, + "grad_norm": 0.7179806232452393, + "learning_rate": 5.149461754476283e-06, + "loss": 0.12661361694335938, + "step": 10911 + }, + { + "epoch": 0.7375963228335812, + "grad_norm": 1.035571575164795, + "learning_rate": 5.146973653147328e-06, + "loss": 0.1771392822265625, + "step": 10912 + }, + { + "epoch": 0.7376639178045153, + "grad_norm": 0.309055358171463, + "learning_rate": 5.144486028569069e-06, + "loss": 0.05239105224609375, + "step": 10913 + }, + { + "epoch": 0.7377315127754495, + "grad_norm": 0.6769823431968689, + "learning_rate": 5.1419988808618664e-06, + "loss": 0.142486572265625, + "step": 10914 + }, + { + "epoch": 0.7377991077463837, + "grad_norm": 0.5773061513900757, + "learning_rate": 5.139512210146065e-06, + "loss": 0.10472679138183594, + "step": 10915 + }, + { + "epoch": 0.7378667027173178, + "grad_norm": 0.7662596702575684, + "learning_rate": 5.137026016541983e-06, + "loss": 0.1212921142578125, + "step": 10916 + }, + { + "epoch": 0.737934297688252, + "grad_norm": 0.29704514145851135, + "learning_rate": 5.134540300169919e-06, + "loss": 0.035541534423828125, + "step": 10917 + }, + { + "epoch": 0.7380018926591861, + "grad_norm": 0.3798324167728424, + "learning_rate": 5.132055061150149e-06, + "loss": 0.0525665283203125, + "step": 10918 + }, + { + "epoch": 0.7380694876301204, + "grad_norm": 0.44742193818092346, + "learning_rate": 5.129570299602924e-06, + "loss": 0.0716705322265625, + "step": 10919 + }, + { + "epoch": 0.7381370826010545, + "grad_norm": 0.6388757228851318, + "learning_rate": 5.127086015648471e-06, + "loss": 0.1428680419921875, + "step": 10920 + }, + { + "epoch": 0.7382046775719886, + "grad_norm": 0.45157140493392944, + "learning_rate": 5.124602209406993e-06, + "loss": 0.0789031982421875, + "step": 10921 + }, + { + "epoch": 0.7382722725429228, + "grad_norm": 0.9418371915817261, + "learning_rate": 5.122118880998678e-06, + "loss": 0.1478271484375, + "step": 10922 + }, + { + "epoch": 0.7383398675138569, + "grad_norm": 0.39824387431144714, + "learning_rate": 5.119636030543675e-06, + "loss": 0.0726318359375, + "step": 10923 + }, + { + "epoch": 0.7384074624847912, + "grad_norm": 0.46004846692085266, + "learning_rate": 5.117153658162135e-06, + "loss": 0.09665679931640625, + "step": 10924 + }, + { + "epoch": 0.7384750574557253, + "grad_norm": 1.2278807163238525, + "learning_rate": 5.114671763974154e-06, + "loss": 0.185546875, + "step": 10925 + }, + { + "epoch": 0.7385426524266595, + "grad_norm": 1.144328236579895, + "learning_rate": 5.112190348099834e-06, + "loss": 0.14036178588867188, + "step": 10926 + }, + { + "epoch": 0.7386102473975936, + "grad_norm": 0.7270378470420837, + "learning_rate": 5.10970941065924e-06, + "loss": 0.1237945556640625, + "step": 10927 + }, + { + "epoch": 0.7386778423685277, + "grad_norm": 0.5068045258522034, + "learning_rate": 5.107228951772403e-06, + "loss": 0.0875701904296875, + "step": 10928 + }, + { + "epoch": 0.738745437339462, + "grad_norm": 1.1631780862808228, + "learning_rate": 5.104748971559356e-06, + "loss": 0.15858078002929688, + "step": 10929 + }, + { + "epoch": 0.7388130323103961, + "grad_norm": 0.4071701765060425, + "learning_rate": 5.102269470140093e-06, + "loss": 0.0692596435546875, + "step": 10930 + }, + { + "epoch": 0.7388806272813303, + "grad_norm": 0.797999382019043, + "learning_rate": 5.099790447634585e-06, + "loss": 0.10527801513671875, + "step": 10931 + }, + { + "epoch": 0.7389482222522644, + "grad_norm": 0.8334342241287231, + "learning_rate": 5.097311904162784e-06, + "loss": 0.1644439697265625, + "step": 10932 + }, + { + "epoch": 0.7390158172231986, + "grad_norm": 1.124812126159668, + "learning_rate": 5.094833839844617e-06, + "loss": 0.19854736328125, + "step": 10933 + }, + { + "epoch": 0.7390834121941328, + "grad_norm": 0.35465008020401, + "learning_rate": 5.092356254799984e-06, + "loss": 0.08512115478515625, + "step": 10934 + }, + { + "epoch": 0.7391510071650669, + "grad_norm": 0.7868688106536865, + "learning_rate": 5.089879149148781e-06, + "loss": 0.133392333984375, + "step": 10935 + }, + { + "epoch": 0.7392186021360011, + "grad_norm": 0.9514992833137512, + "learning_rate": 5.087402523010844e-06, + "loss": 0.14580535888671875, + "step": 10936 + }, + { + "epoch": 0.7392861971069352, + "grad_norm": 0.9368302822113037, + "learning_rate": 5.08492637650603e-06, + "loss": 0.15611648559570312, + "step": 10937 + }, + { + "epoch": 0.7393537920778694, + "grad_norm": 0.7370114922523499, + "learning_rate": 5.0824507097541285e-06, + "loss": 0.13494873046875, + "step": 10938 + }, + { + "epoch": 0.7394213870488036, + "grad_norm": 0.565619945526123, + "learning_rate": 5.079975522874943e-06, + "loss": 0.09455490112304688, + "step": 10939 + }, + { + "epoch": 0.7394889820197378, + "grad_norm": 1.1043765544891357, + "learning_rate": 5.0775008159882395e-06, + "loss": 0.2034149169921875, + "step": 10940 + }, + { + "epoch": 0.7395565769906719, + "grad_norm": 0.20933304727077484, + "learning_rate": 5.0750265892137415e-06, + "loss": 0.03433990478515625, + "step": 10941 + }, + { + "epoch": 0.739624171961606, + "grad_norm": 0.40450674295425415, + "learning_rate": 5.0725528426711865e-06, + "loss": 0.055828094482421875, + "step": 10942 + }, + { + "epoch": 0.7396917669325402, + "grad_norm": 0.1583317667245865, + "learning_rate": 5.070079576480263e-06, + "loss": 0.028835296630859375, + "step": 10943 + }, + { + "epoch": 0.7397593619034744, + "grad_norm": 0.76679927110672, + "learning_rate": 5.067606790760645e-06, + "loss": 0.1328887939453125, + "step": 10944 + }, + { + "epoch": 0.7398269568744086, + "grad_norm": 0.3368452787399292, + "learning_rate": 5.065134485631976e-06, + "loss": 0.05861663818359375, + "step": 10945 + }, + { + "epoch": 0.7398945518453427, + "grad_norm": 0.7892310619354248, + "learning_rate": 5.0626626612138875e-06, + "loss": 0.153472900390625, + "step": 10946 + }, + { + "epoch": 0.7399621468162769, + "grad_norm": 1.0106256008148193, + "learning_rate": 5.060191317625973e-06, + "loss": 0.178070068359375, + "step": 10947 + }, + { + "epoch": 0.740029741787211, + "grad_norm": 0.41051068902015686, + "learning_rate": 5.057720454987826e-06, + "loss": 0.0861053466796875, + "step": 10948 + }, + { + "epoch": 0.7400973367581452, + "grad_norm": 0.6456829905509949, + "learning_rate": 5.055250073418986e-06, + "loss": 0.12253952026367188, + "step": 10949 + }, + { + "epoch": 0.7401649317290794, + "grad_norm": 0.3186339735984802, + "learning_rate": 5.052780173039003e-06, + "loss": 0.061248779296875, + "step": 10950 + }, + { + "epoch": 0.7402325267000135, + "grad_norm": 0.7894756197929382, + "learning_rate": 5.05031075396737e-06, + "loss": 0.135162353515625, + "step": 10951 + }, + { + "epoch": 0.7403001216709477, + "grad_norm": 0.339425653219223, + "learning_rate": 5.0478418163235745e-06, + "loss": 0.0608367919921875, + "step": 10952 + }, + { + "epoch": 0.7403677166418818, + "grad_norm": 0.5388440489768982, + "learning_rate": 5.045373360227094e-06, + "loss": 0.107147216796875, + "step": 10953 + }, + { + "epoch": 0.7404353116128161, + "grad_norm": 0.2505810260772705, + "learning_rate": 5.0429053857973485e-06, + "loss": 0.06182861328125, + "step": 10954 + }, + { + "epoch": 0.7405029065837502, + "grad_norm": 0.2577395439147949, + "learning_rate": 5.040437893153767e-06, + "loss": 0.033843994140625, + "step": 10955 + }, + { + "epoch": 0.7405705015546843, + "grad_norm": 0.888590395450592, + "learning_rate": 5.037970882415738e-06, + "loss": 0.14364242553710938, + "step": 10956 + }, + { + "epoch": 0.7406380965256185, + "grad_norm": 0.7327591180801392, + "learning_rate": 5.035504353702632e-06, + "loss": 0.07717514038085938, + "step": 10957 + }, + { + "epoch": 0.7407056914965526, + "grad_norm": 0.8464674949645996, + "learning_rate": 5.033038307133792e-06, + "loss": 0.1454315185546875, + "step": 10958 + }, + { + "epoch": 0.7407732864674869, + "grad_norm": 1.365553379058838, + "learning_rate": 5.030572742828545e-06, + "loss": 0.13138580322265625, + "step": 10959 + }, + { + "epoch": 0.740840881438421, + "grad_norm": 0.4612135589122772, + "learning_rate": 5.028107660906181e-06, + "loss": 0.0902557373046875, + "step": 10960 + }, + { + "epoch": 0.7409084764093552, + "grad_norm": 0.3743742108345032, + "learning_rate": 5.025643061485992e-06, + "loss": 0.0683441162109375, + "step": 10961 + }, + { + "epoch": 0.7409760713802893, + "grad_norm": 0.6832792162895203, + "learning_rate": 5.023178944687219e-06, + "loss": 0.1354217529296875, + "step": 10962 + }, + { + "epoch": 0.7410436663512234, + "grad_norm": 0.4256118834018707, + "learning_rate": 5.020715310629088e-06, + "loss": 0.07146453857421875, + "step": 10963 + }, + { + "epoch": 0.7411112613221577, + "grad_norm": 0.7367828488349915, + "learning_rate": 5.018252159430819e-06, + "loss": 0.1315155029296875, + "step": 10964 + }, + { + "epoch": 0.7411788562930918, + "grad_norm": 0.6345504522323608, + "learning_rate": 5.01578949121158e-06, + "loss": 0.148040771484375, + "step": 10965 + }, + { + "epoch": 0.741246451264026, + "grad_norm": 1.1175718307495117, + "learning_rate": 5.013327306090543e-06, + "loss": 0.182220458984375, + "step": 10966 + }, + { + "epoch": 0.7413140462349601, + "grad_norm": 0.5206151604652405, + "learning_rate": 5.010865604186829e-06, + "loss": 0.10671234130859375, + "step": 10967 + }, + { + "epoch": 0.7413816412058942, + "grad_norm": 0.311855286359787, + "learning_rate": 5.008404385619563e-06, + "loss": 0.041271209716796875, + "step": 10968 + }, + { + "epoch": 0.7414492361768285, + "grad_norm": 0.2359493374824524, + "learning_rate": 5.005943650507831e-06, + "loss": 0.0407257080078125, + "step": 10969 + }, + { + "epoch": 0.7415168311477626, + "grad_norm": 0.5239661335945129, + "learning_rate": 5.003483398970698e-06, + "loss": 0.1004180908203125, + "step": 10970 + }, + { + "epoch": 0.7415844261186968, + "grad_norm": 0.5080515146255493, + "learning_rate": 5.001023631127204e-06, + "loss": 0.1005706787109375, + "step": 10971 + }, + { + "epoch": 0.7416520210896309, + "grad_norm": 0.9997541308403015, + "learning_rate": 4.998564347096372e-06, + "loss": 0.2297821044921875, + "step": 10972 + }, + { + "epoch": 0.7417196160605651, + "grad_norm": 0.4647265374660492, + "learning_rate": 4.996105546997188e-06, + "loss": 0.06475067138671875, + "step": 10973 + }, + { + "epoch": 0.7417872110314992, + "grad_norm": 1.1959903240203857, + "learning_rate": 4.9936472309486425e-06, + "loss": 0.1896514892578125, + "step": 10974 + }, + { + "epoch": 0.7418548060024334, + "grad_norm": 0.356918603181839, + "learning_rate": 4.991189399069666e-06, + "loss": 0.06489944458007812, + "step": 10975 + }, + { + "epoch": 0.7419224009733676, + "grad_norm": 1.0597678422927856, + "learning_rate": 4.988732051479187e-06, + "loss": 0.20318603515625, + "step": 10976 + }, + { + "epoch": 0.7419899959443017, + "grad_norm": 0.8464162945747375, + "learning_rate": 4.986275188296121e-06, + "loss": 0.170989990234375, + "step": 10977 + }, + { + "epoch": 0.7420575909152359, + "grad_norm": 0.8315334916114807, + "learning_rate": 4.983818809639326e-06, + "loss": 0.1189422607421875, + "step": 10978 + }, + { + "epoch": 0.74212518588617, + "grad_norm": 0.5735172033309937, + "learning_rate": 4.981362915627676e-06, + "loss": 0.06328964233398438, + "step": 10979 + }, + { + "epoch": 0.7421927808571043, + "grad_norm": 0.6391464471817017, + "learning_rate": 4.978907506379985e-06, + "loss": 0.11788558959960938, + "step": 10980 + }, + { + "epoch": 0.7422603758280384, + "grad_norm": 0.5476253628730774, + "learning_rate": 4.976452582015073e-06, + "loss": 0.08624267578125, + "step": 10981 + }, + { + "epoch": 0.7423279707989725, + "grad_norm": 0.2556057870388031, + "learning_rate": 4.97399814265172e-06, + "loss": 0.0379638671875, + "step": 10982 + }, + { + "epoch": 0.7423955657699067, + "grad_norm": 0.34689420461654663, + "learning_rate": 4.9715441884086895e-06, + "loss": 0.05864715576171875, + "step": 10983 + }, + { + "epoch": 0.7424631607408408, + "grad_norm": 0.5950024127960205, + "learning_rate": 4.969090719404718e-06, + "loss": 0.1280364990234375, + "step": 10984 + }, + { + "epoch": 0.7425307557117751, + "grad_norm": 0.5675973892211914, + "learning_rate": 4.966637735758518e-06, + "loss": 0.0997314453125, + "step": 10985 + }, + { + "epoch": 0.7425983506827092, + "grad_norm": 0.28722700476646423, + "learning_rate": 4.964185237588781e-06, + "loss": 0.0573577880859375, + "step": 10986 + }, + { + "epoch": 0.7426659456536434, + "grad_norm": 0.8129509091377258, + "learning_rate": 4.961733225014174e-06, + "loss": 0.166168212890625, + "step": 10987 + }, + { + "epoch": 0.7427335406245775, + "grad_norm": 0.6197152733802795, + "learning_rate": 4.9592816981533405e-06, + "loss": 0.1219329833984375, + "step": 10988 + }, + { + "epoch": 0.7428011355955116, + "grad_norm": 0.3386520445346832, + "learning_rate": 4.956830657124898e-06, + "loss": 0.0522611141204834, + "step": 10989 + }, + { + "epoch": 0.7428687305664459, + "grad_norm": 0.29228854179382324, + "learning_rate": 4.9543801020474546e-06, + "loss": 0.04326629638671875, + "step": 10990 + }, + { + "epoch": 0.74293632553738, + "grad_norm": 0.22217562794685364, + "learning_rate": 4.951930033039566e-06, + "loss": 0.04528045654296875, + "step": 10991 + }, + { + "epoch": 0.7430039205083142, + "grad_norm": 0.6633599996566772, + "learning_rate": 4.949480450219795e-06, + "loss": 0.08380889892578125, + "step": 10992 + }, + { + "epoch": 0.7430715154792483, + "grad_norm": 0.8753322958946228, + "learning_rate": 4.947031353706663e-06, + "loss": 0.1495494842529297, + "step": 10993 + }, + { + "epoch": 0.7431391104501825, + "grad_norm": 0.31021547317504883, + "learning_rate": 4.9445827436186735e-06, + "loss": 0.04801177978515625, + "step": 10994 + }, + { + "epoch": 0.7432067054211167, + "grad_norm": 1.0210375785827637, + "learning_rate": 4.942134620074304e-06, + "loss": 0.12248992919921875, + "step": 10995 + }, + { + "epoch": 0.7432743003920508, + "grad_norm": 0.49234285950660706, + "learning_rate": 4.939686983192011e-06, + "loss": 0.052921295166015625, + "step": 10996 + }, + { + "epoch": 0.743341895362985, + "grad_norm": 1.3984414339065552, + "learning_rate": 4.937239833090227e-06, + "loss": 0.16259765625, + "step": 10997 + }, + { + "epoch": 0.7434094903339191, + "grad_norm": 0.6072864532470703, + "learning_rate": 4.9347931698873575e-06, + "loss": 0.112213134765625, + "step": 10998 + }, + { + "epoch": 0.7434770853048533, + "grad_norm": 0.7423064112663269, + "learning_rate": 4.932346993701792e-06, + "loss": 0.1189727783203125, + "step": 10999 + }, + { + "epoch": 0.7435446802757875, + "grad_norm": 0.3661513924598694, + "learning_rate": 4.929901304651887e-06, + "loss": 0.0486602783203125, + "step": 11000 + }, + { + "epoch": 0.7436122752467217, + "grad_norm": 0.7732366323471069, + "learning_rate": 4.927456102855984e-06, + "loss": 0.1060943603515625, + "step": 11001 + }, + { + "epoch": 0.7436798702176558, + "grad_norm": 1.0320533514022827, + "learning_rate": 4.925011388432388e-06, + "loss": 0.188446044921875, + "step": 11002 + }, + { + "epoch": 0.7437474651885899, + "grad_norm": 0.3246951401233673, + "learning_rate": 4.922567161499408e-06, + "loss": 0.058895111083984375, + "step": 11003 + }, + { + "epoch": 0.7438150601595241, + "grad_norm": 0.609565019607544, + "learning_rate": 4.920123422175289e-06, + "loss": 0.10140228271484375, + "step": 11004 + }, + { + "epoch": 0.7438826551304583, + "grad_norm": 0.9782124757766724, + "learning_rate": 4.917680170578292e-06, + "loss": 0.15789794921875, + "step": 11005 + }, + { + "epoch": 0.7439502501013925, + "grad_norm": 0.755864679813385, + "learning_rate": 4.915237406826626e-06, + "loss": 0.12267303466796875, + "step": 11006 + }, + { + "epoch": 0.7440178450723266, + "grad_norm": 1.0973916053771973, + "learning_rate": 4.9127951310384934e-06, + "loss": 0.13576889038085938, + "step": 11007 + }, + { + "epoch": 0.7440854400432608, + "grad_norm": 1.0399583578109741, + "learning_rate": 4.910353343332063e-06, + "loss": 0.1682281494140625, + "step": 11008 + }, + { + "epoch": 0.7441530350141949, + "grad_norm": 1.3221924304962158, + "learning_rate": 4.907912043825485e-06, + "loss": 0.20508575439453125, + "step": 11009 + }, + { + "epoch": 0.7442206299851291, + "grad_norm": 1.178950309753418, + "learning_rate": 4.905471232636885e-06, + "loss": 0.215911865234375, + "step": 11010 + }, + { + "epoch": 0.7442882249560633, + "grad_norm": 1.0448479652404785, + "learning_rate": 4.903030909884363e-06, + "loss": 0.14156341552734375, + "step": 11011 + }, + { + "epoch": 0.7443558199269974, + "grad_norm": 0.4094710946083069, + "learning_rate": 4.900591075685998e-06, + "loss": 0.07112884521484375, + "step": 11012 + }, + { + "epoch": 0.7444234148979316, + "grad_norm": 2.5163416862487793, + "learning_rate": 4.898151730159845e-06, + "loss": 0.2156982421875, + "step": 11013 + }, + { + "epoch": 0.7444910098688657, + "grad_norm": 0.2242310494184494, + "learning_rate": 4.895712873423934e-06, + "loss": 0.043460845947265625, + "step": 11014 + }, + { + "epoch": 0.7445586048398, + "grad_norm": 0.41420063376426697, + "learning_rate": 4.893274505596267e-06, + "loss": 0.0984039306640625, + "step": 11015 + }, + { + "epoch": 0.7446261998107341, + "grad_norm": 0.7671650648117065, + "learning_rate": 4.89083662679484e-06, + "loss": 0.097747802734375, + "step": 11016 + }, + { + "epoch": 0.7446937947816682, + "grad_norm": 0.7386581301689148, + "learning_rate": 4.888399237137597e-06, + "loss": 0.1368865966796875, + "step": 11017 + }, + { + "epoch": 0.7447613897526024, + "grad_norm": 0.6081938743591309, + "learning_rate": 4.885962336742489e-06, + "loss": 0.10515594482421875, + "step": 11018 + }, + { + "epoch": 0.7448289847235365, + "grad_norm": 0.5340843200683594, + "learning_rate": 4.88352592572742e-06, + "loss": 0.09397506713867188, + "step": 11019 + }, + { + "epoch": 0.7448965796944708, + "grad_norm": 2.7354815006256104, + "learning_rate": 4.8810900042102805e-06, + "loss": 0.1558380126953125, + "step": 11020 + }, + { + "epoch": 0.7449641746654049, + "grad_norm": 0.6524830460548401, + "learning_rate": 4.878654572308936e-06, + "loss": 0.137420654296875, + "step": 11021 + }, + { + "epoch": 0.7450317696363391, + "grad_norm": 0.8849839568138123, + "learning_rate": 4.876219630141227e-06, + "loss": 0.133392333984375, + "step": 11022 + }, + { + "epoch": 0.7450993646072732, + "grad_norm": 0.4497668147087097, + "learning_rate": 4.873785177824971e-06, + "loss": 0.0765533447265625, + "step": 11023 + }, + { + "epoch": 0.7451669595782073, + "grad_norm": 0.5673354268074036, + "learning_rate": 4.871351215477962e-06, + "loss": 0.103668212890625, + "step": 11024 + }, + { + "epoch": 0.7452345545491416, + "grad_norm": 0.6653240323066711, + "learning_rate": 4.868917743217971e-06, + "loss": 0.1350555419921875, + "step": 11025 + }, + { + "epoch": 0.7453021495200757, + "grad_norm": 0.5310679078102112, + "learning_rate": 4.866484761162745e-06, + "loss": 0.1059722900390625, + "step": 11026 + }, + { + "epoch": 0.7453697444910099, + "grad_norm": 0.8023433685302734, + "learning_rate": 4.864052269430005e-06, + "loss": 0.18920135498046875, + "step": 11027 + }, + { + "epoch": 0.745437339461944, + "grad_norm": 0.8073356747627258, + "learning_rate": 4.861620268137447e-06, + "loss": 0.12821578979492188, + "step": 11028 + }, + { + "epoch": 0.7455049344328782, + "grad_norm": 0.7066130042076111, + "learning_rate": 4.85918875740276e-06, + "loss": 0.13153839111328125, + "step": 11029 + }, + { + "epoch": 0.7455725294038124, + "grad_norm": 0.3356715440750122, + "learning_rate": 4.856757737343574e-06, + "loss": 0.04219818115234375, + "step": 11030 + }, + { + "epoch": 0.7456401243747465, + "grad_norm": 0.7914642095565796, + "learning_rate": 4.854327208077536e-06, + "loss": 0.13051605224609375, + "step": 11031 + }, + { + "epoch": 0.7457077193456807, + "grad_norm": 0.7046528458595276, + "learning_rate": 4.8518971697222475e-06, + "loss": 0.11333465576171875, + "step": 11032 + }, + { + "epoch": 0.7457753143166148, + "grad_norm": 0.5916836261749268, + "learning_rate": 4.849467622395273e-06, + "loss": 0.1266632080078125, + "step": 11033 + }, + { + "epoch": 0.745842909287549, + "grad_norm": 0.4795233905315399, + "learning_rate": 4.8470385662141865e-06, + "loss": 0.054233551025390625, + "step": 11034 + }, + { + "epoch": 0.7459105042584832, + "grad_norm": 0.35367351770401, + "learning_rate": 4.844610001296514e-06, + "loss": 0.036510467529296875, + "step": 11035 + }, + { + "epoch": 0.7459780992294174, + "grad_norm": 0.9213475584983826, + "learning_rate": 4.842181927759767e-06, + "loss": 0.08929443359375, + "step": 11036 + }, + { + "epoch": 0.7460456942003515, + "grad_norm": 1.021621584892273, + "learning_rate": 4.839754345721428e-06, + "loss": 0.1057891845703125, + "step": 11037 + }, + { + "epoch": 0.7461132891712856, + "grad_norm": 0.6573030948638916, + "learning_rate": 4.83732725529896e-06, + "loss": 0.11279296875, + "step": 11038 + }, + { + "epoch": 0.7461808841422198, + "grad_norm": 0.6837291717529297, + "learning_rate": 4.834900656609795e-06, + "loss": 0.1407318115234375, + "step": 11039 + }, + { + "epoch": 0.746248479113154, + "grad_norm": 0.6345953345298767, + "learning_rate": 4.832474549771363e-06, + "loss": 0.1396484375, + "step": 11040 + }, + { + "epoch": 0.7463160740840882, + "grad_norm": 0.5388017296791077, + "learning_rate": 4.830048934901032e-06, + "loss": 0.06198883056640625, + "step": 11041 + }, + { + "epoch": 0.7463836690550223, + "grad_norm": 0.8679295778274536, + "learning_rate": 4.827623812116191e-06, + "loss": 0.13062286376953125, + "step": 11042 + }, + { + "epoch": 0.7464512640259565, + "grad_norm": 0.23772229254245758, + "learning_rate": 4.825199181534161e-06, + "loss": 0.031925201416015625, + "step": 11043 + }, + { + "epoch": 0.7465188589968906, + "grad_norm": 0.8416712880134583, + "learning_rate": 4.822775043272276e-06, + "loss": 0.1778411865234375, + "step": 11044 + }, + { + "epoch": 0.7465864539678247, + "grad_norm": 0.8360210061073303, + "learning_rate": 4.82035139744783e-06, + "loss": 0.1535186767578125, + "step": 11045 + }, + { + "epoch": 0.746654048938759, + "grad_norm": 0.8823641538619995, + "learning_rate": 4.8179282441780815e-06, + "loss": 0.1168975830078125, + "step": 11046 + }, + { + "epoch": 0.7467216439096931, + "grad_norm": 0.18091250956058502, + "learning_rate": 4.815505583580291e-06, + "loss": 0.023054122924804688, + "step": 11047 + }, + { + "epoch": 0.7467892388806273, + "grad_norm": 0.8282818794250488, + "learning_rate": 4.813083415771676e-06, + "loss": 0.1081085205078125, + "step": 11048 + }, + { + "epoch": 0.7468568338515614, + "grad_norm": 0.4781520664691925, + "learning_rate": 4.810661740869438e-06, + "loss": 0.07837677001953125, + "step": 11049 + }, + { + "epoch": 0.7469244288224957, + "grad_norm": 0.7984262108802795, + "learning_rate": 4.808240558990751e-06, + "loss": 0.12679290771484375, + "step": 11050 + }, + { + "epoch": 0.7469920237934298, + "grad_norm": 0.6131070852279663, + "learning_rate": 4.805819870252769e-06, + "loss": 0.10340118408203125, + "step": 11051 + }, + { + "epoch": 0.7470596187643639, + "grad_norm": 0.7476539611816406, + "learning_rate": 4.803399674772612e-06, + "loss": 0.1664581298828125, + "step": 11052 + }, + { + "epoch": 0.7471272137352981, + "grad_norm": 0.6460263133049011, + "learning_rate": 4.800979972667402e-06, + "loss": 0.0931549072265625, + "step": 11053 + }, + { + "epoch": 0.7471948087062322, + "grad_norm": 0.7347677946090698, + "learning_rate": 4.798560764054196e-06, + "loss": 0.18187713623046875, + "step": 11054 + }, + { + "epoch": 0.7472624036771665, + "grad_norm": 0.7571201920509338, + "learning_rate": 4.796142049050069e-06, + "loss": 0.180206298828125, + "step": 11055 + }, + { + "epoch": 0.7473299986481006, + "grad_norm": 1.0378172397613525, + "learning_rate": 4.7937238277720516e-06, + "loss": 0.217803955078125, + "step": 11056 + }, + { + "epoch": 0.7473975936190348, + "grad_norm": 0.5273662209510803, + "learning_rate": 4.791306100337138e-06, + "loss": 0.0835108757019043, + "step": 11057 + }, + { + "epoch": 0.7474651885899689, + "grad_norm": 0.35981181263923645, + "learning_rate": 4.788888866862331e-06, + "loss": 0.049602508544921875, + "step": 11058 + }, + { + "epoch": 0.747532783560903, + "grad_norm": 0.8393887281417847, + "learning_rate": 4.786472127464575e-06, + "loss": 0.1157379150390625, + "step": 11059 + }, + { + "epoch": 0.7476003785318373, + "grad_norm": 0.9057506918907166, + "learning_rate": 4.784055882260818e-06, + "loss": 0.130859375, + "step": 11060 + }, + { + "epoch": 0.7476679735027714, + "grad_norm": 1.2222014665603638, + "learning_rate": 4.781640131367969e-06, + "loss": 0.1859130859375, + "step": 11061 + }, + { + "epoch": 0.7477355684737056, + "grad_norm": 1.6767933368682861, + "learning_rate": 4.779224874902919e-06, + "loss": 0.17572021484375, + "step": 11062 + }, + { + "epoch": 0.7478031634446397, + "grad_norm": 0.6101590991020203, + "learning_rate": 4.776810112982532e-06, + "loss": 0.09345245361328125, + "step": 11063 + }, + { + "epoch": 0.7478707584155739, + "grad_norm": 0.41289448738098145, + "learning_rate": 4.774395845723649e-06, + "loss": 0.0567779541015625, + "step": 11064 + }, + { + "epoch": 0.747938353386508, + "grad_norm": 0.3155573606491089, + "learning_rate": 4.771982073243082e-06, + "loss": 0.03842926025390625, + "step": 11065 + }, + { + "epoch": 0.7480059483574422, + "grad_norm": 0.3485616147518158, + "learning_rate": 4.76956879565764e-06, + "loss": 0.0474090576171875, + "step": 11066 + }, + { + "epoch": 0.7480735433283764, + "grad_norm": 0.8964559435844421, + "learning_rate": 4.767156013084076e-06, + "loss": 0.163848876953125, + "step": 11067 + }, + { + "epoch": 0.7481411382993105, + "grad_norm": 0.4929598867893219, + "learning_rate": 4.764743725639138e-06, + "loss": 0.096405029296875, + "step": 11068 + }, + { + "epoch": 0.7482087332702447, + "grad_norm": 0.36258465051651, + "learning_rate": 4.76233193343956e-06, + "loss": 0.077056884765625, + "step": 11069 + }, + { + "epoch": 0.7482763282411788, + "grad_norm": 0.30911046266555786, + "learning_rate": 4.75992063660202e-06, + "loss": 0.05277252197265625, + "step": 11070 + }, + { + "epoch": 0.7483439232121131, + "grad_norm": 1.4915484189987183, + "learning_rate": 4.757509835243214e-06, + "loss": 0.2408447265625, + "step": 11071 + }, + { + "epoch": 0.7484115181830472, + "grad_norm": 0.6337834596633911, + "learning_rate": 4.755099529479767e-06, + "loss": 0.126861572265625, + "step": 11072 + }, + { + "epoch": 0.7484791131539813, + "grad_norm": 0.8586395382881165, + "learning_rate": 4.752689719428323e-06, + "loss": 0.13442230224609375, + "step": 11073 + }, + { + "epoch": 0.7485467081249155, + "grad_norm": 1.5162543058395386, + "learning_rate": 4.750280405205478e-06, + "loss": 0.258148193359375, + "step": 11074 + }, + { + "epoch": 0.7486143030958496, + "grad_norm": 0.3564082384109497, + "learning_rate": 4.747871586927809e-06, + "loss": 0.055660247802734375, + "step": 11075 + }, + { + "epoch": 0.7486818980667839, + "grad_norm": 0.9789965748786926, + "learning_rate": 4.7454632647118705e-06, + "loss": 0.179046630859375, + "step": 11076 + }, + { + "epoch": 0.748749493037718, + "grad_norm": 0.28976812958717346, + "learning_rate": 4.743055438674192e-06, + "loss": 0.052974700927734375, + "step": 11077 + }, + { + "epoch": 0.7488170880086522, + "grad_norm": 0.5302175283432007, + "learning_rate": 4.740648108931273e-06, + "loss": 0.0962677001953125, + "step": 11078 + }, + { + "epoch": 0.7488846829795863, + "grad_norm": 0.2766847312450409, + "learning_rate": 4.738241275599611e-06, + "loss": 0.03552818298339844, + "step": 11079 + }, + { + "epoch": 0.7489522779505204, + "grad_norm": 0.6630458831787109, + "learning_rate": 4.735834938795647e-06, + "loss": 0.1300048828125, + "step": 11080 + }, + { + "epoch": 0.7490198729214547, + "grad_norm": 2.174675941467285, + "learning_rate": 4.7334290986358166e-06, + "loss": 0.2288818359375, + "step": 11081 + }, + { + "epoch": 0.7490874678923888, + "grad_norm": 0.8647695183753967, + "learning_rate": 4.731023755236543e-06, + "loss": 0.1517486572265625, + "step": 11082 + }, + { + "epoch": 0.749155062863323, + "grad_norm": 1.5568821430206299, + "learning_rate": 4.728618908714194e-06, + "loss": 0.222625732421875, + "step": 11083 + }, + { + "epoch": 0.7492226578342571, + "grad_norm": 0.8083123564720154, + "learning_rate": 4.726214559185143e-06, + "loss": 0.1179656982421875, + "step": 11084 + }, + { + "epoch": 0.7492902528051913, + "grad_norm": 1.1030529737472534, + "learning_rate": 4.723810706765724e-06, + "loss": 0.168365478515625, + "step": 11085 + }, + { + "epoch": 0.7493578477761255, + "grad_norm": 0.4172740876674652, + "learning_rate": 4.721407351572248e-06, + "loss": 0.06512451171875, + "step": 11086 + }, + { + "epoch": 0.7494254427470596, + "grad_norm": 0.24360893666744232, + "learning_rate": 4.719004493721009e-06, + "loss": 0.030849456787109375, + "step": 11087 + }, + { + "epoch": 0.7494930377179938, + "grad_norm": 0.7639201879501343, + "learning_rate": 4.716602133328268e-06, + "loss": 0.138458251953125, + "step": 11088 + }, + { + "epoch": 0.7495606326889279, + "grad_norm": 0.23441724479198456, + "learning_rate": 4.714200270510268e-06, + "loss": 0.04039764404296875, + "step": 11089 + }, + { + "epoch": 0.7496282276598621, + "grad_norm": 0.18680697679519653, + "learning_rate": 4.711798905383226e-06, + "loss": 0.02809906005859375, + "step": 11090 + }, + { + "epoch": 0.7496958226307963, + "grad_norm": 0.8915424346923828, + "learning_rate": 4.7093980380633335e-06, + "loss": 0.1725311279296875, + "step": 11091 + }, + { + "epoch": 0.7497634176017305, + "grad_norm": 0.5181515216827393, + "learning_rate": 4.706997668666762e-06, + "loss": 0.0912017822265625, + "step": 11092 + }, + { + "epoch": 0.7498310125726646, + "grad_norm": 0.2774382531642914, + "learning_rate": 4.704597797309652e-06, + "loss": 0.04077911376953125, + "step": 11093 + }, + { + "epoch": 0.7498986075435987, + "grad_norm": 0.4117909073829651, + "learning_rate": 4.7021984241081245e-06, + "loss": 0.09295654296875, + "step": 11094 + }, + { + "epoch": 0.7499662025145329, + "grad_norm": 0.48373833298683167, + "learning_rate": 4.699799549178287e-06, + "loss": 0.08978271484375, + "step": 11095 + }, + { + "epoch": 0.7500337974854671, + "grad_norm": 0.7430428862571716, + "learning_rate": 4.697401172636195e-06, + "loss": 0.13134765625, + "step": 11096 + }, + { + "epoch": 0.7501013924564013, + "grad_norm": 0.38847702741622925, + "learning_rate": 4.695003294597911e-06, + "loss": 0.07841110229492188, + "step": 11097 + }, + { + "epoch": 0.7501689874273354, + "grad_norm": 0.2021949589252472, + "learning_rate": 4.692605915179452e-06, + "loss": 0.025481224060058594, + "step": 11098 + }, + { + "epoch": 0.7502365823982695, + "grad_norm": 0.6563234925270081, + "learning_rate": 4.69020903449682e-06, + "loss": 0.08673858642578125, + "step": 11099 + }, + { + "epoch": 0.7503041773692037, + "grad_norm": 1.2821840047836304, + "learning_rate": 4.687812652665991e-06, + "loss": 0.203460693359375, + "step": 11100 + }, + { + "epoch": 0.7503717723401379, + "grad_norm": 0.49734172224998474, + "learning_rate": 4.685416769802916e-06, + "loss": 0.0684051513671875, + "step": 11101 + }, + { + "epoch": 0.7504393673110721, + "grad_norm": 1.0278329849243164, + "learning_rate": 4.683021386023524e-06, + "loss": 0.1354522705078125, + "step": 11102 + }, + { + "epoch": 0.7505069622820062, + "grad_norm": 1.2221057415008545, + "learning_rate": 4.680626501443719e-06, + "loss": 0.182647705078125, + "step": 11103 + }, + { + "epoch": 0.7505745572529404, + "grad_norm": 1.395385503768921, + "learning_rate": 4.678232116179378e-06, + "loss": 0.18692779541015625, + "step": 11104 + }, + { + "epoch": 0.7506421522238745, + "grad_norm": 0.9918705821037292, + "learning_rate": 4.675838230346359e-06, + "loss": 0.214080810546875, + "step": 11105 + }, + { + "epoch": 0.7507097471948087, + "grad_norm": 1.2499306201934814, + "learning_rate": 4.673444844060491e-06, + "loss": 0.14537811279296875, + "step": 11106 + }, + { + "epoch": 0.7507773421657429, + "grad_norm": 0.8020946979522705, + "learning_rate": 4.671051957437576e-06, + "loss": 0.13812255859375, + "step": 11107 + }, + { + "epoch": 0.750844937136677, + "grad_norm": 0.49212491512298584, + "learning_rate": 4.668659570593413e-06, + "loss": 0.07574462890625, + "step": 11108 + }, + { + "epoch": 0.7509125321076112, + "grad_norm": 0.4807109236717224, + "learning_rate": 4.66626768364374e-06, + "loss": 0.0856170654296875, + "step": 11109 + }, + { + "epoch": 0.7509801270785453, + "grad_norm": 0.7412552237510681, + "learning_rate": 4.663876296704306e-06, + "loss": 0.11296844482421875, + "step": 11110 + }, + { + "epoch": 0.7510477220494796, + "grad_norm": 0.2060859054327011, + "learning_rate": 4.661485409890815e-06, + "loss": 0.02553558349609375, + "step": 11111 + }, + { + "epoch": 0.7511153170204137, + "grad_norm": 0.28455811738967896, + "learning_rate": 4.6590950233189564e-06, + "loss": 0.042949676513671875, + "step": 11112 + }, + { + "epoch": 0.7511829119913478, + "grad_norm": 0.21646110713481903, + "learning_rate": 4.65670513710439e-06, + "loss": 0.03620147705078125, + "step": 11113 + }, + { + "epoch": 0.751250506962282, + "grad_norm": 0.2321665734052658, + "learning_rate": 4.654315751362752e-06, + "loss": 0.04288673400878906, + "step": 11114 + }, + { + "epoch": 0.7513181019332161, + "grad_norm": 0.7900711894035339, + "learning_rate": 4.651926866209657e-06, + "loss": 0.14026641845703125, + "step": 11115 + }, + { + "epoch": 0.7513856969041504, + "grad_norm": 0.6227104067802429, + "learning_rate": 4.649538481760696e-06, + "loss": 0.08586883544921875, + "step": 11116 + }, + { + "epoch": 0.7514532918750845, + "grad_norm": 0.475069522857666, + "learning_rate": 4.64715059813143e-06, + "loss": 0.06800079345703125, + "step": 11117 + }, + { + "epoch": 0.7515208868460187, + "grad_norm": 1.0499026775360107, + "learning_rate": 4.644763215437403e-06, + "loss": 0.1288299560546875, + "step": 11118 + }, + { + "epoch": 0.7515884818169528, + "grad_norm": 0.965291440486908, + "learning_rate": 4.642376333794129e-06, + "loss": 0.1667327880859375, + "step": 11119 + }, + { + "epoch": 0.7516560767878869, + "grad_norm": 0.9412931799888611, + "learning_rate": 4.639989953317098e-06, + "loss": 0.16553497314453125, + "step": 11120 + }, + { + "epoch": 0.7517236717588212, + "grad_norm": 0.7284510135650635, + "learning_rate": 4.637604074121791e-06, + "loss": 0.14249420166015625, + "step": 11121 + }, + { + "epoch": 0.7517912667297553, + "grad_norm": 0.5243281722068787, + "learning_rate": 4.635218696323631e-06, + "loss": 0.07079315185546875, + "step": 11122 + }, + { + "epoch": 0.7518588617006895, + "grad_norm": 0.9353466033935547, + "learning_rate": 4.632833820038053e-06, + "loss": 0.12603759765625, + "step": 11123 + }, + { + "epoch": 0.7519264566716236, + "grad_norm": 0.9355008006095886, + "learning_rate": 4.630449445380449e-06, + "loss": 0.1576385498046875, + "step": 11124 + }, + { + "epoch": 0.7519940516425578, + "grad_norm": 1.5317444801330566, + "learning_rate": 4.628065572466188e-06, + "loss": 0.14080810546875, + "step": 11125 + }, + { + "epoch": 0.752061646613492, + "grad_norm": 0.5534554719924927, + "learning_rate": 4.625682201410616e-06, + "loss": 0.08226776123046875, + "step": 11126 + }, + { + "epoch": 0.7521292415844261, + "grad_norm": 1.0882458686828613, + "learning_rate": 4.623299332329057e-06, + "loss": 0.23797607421875, + "step": 11127 + }, + { + "epoch": 0.7521968365553603, + "grad_norm": 0.9990894198417664, + "learning_rate": 4.620916965336809e-06, + "loss": 0.14012908935546875, + "step": 11128 + }, + { + "epoch": 0.7522644315262944, + "grad_norm": 0.5503876805305481, + "learning_rate": 4.618535100549143e-06, + "loss": 0.10406494140625, + "step": 11129 + }, + { + "epoch": 0.7523320264972286, + "grad_norm": 0.9171949625015259, + "learning_rate": 4.616153738081311e-06, + "loss": 0.17437744140625, + "step": 11130 + }, + { + "epoch": 0.7523996214681627, + "grad_norm": 0.5313889384269714, + "learning_rate": 4.613772878048536e-06, + "loss": 0.099884033203125, + "step": 11131 + }, + { + "epoch": 0.752467216439097, + "grad_norm": 0.43852582573890686, + "learning_rate": 4.611392520566028e-06, + "loss": 0.0994110107421875, + "step": 11132 + }, + { + "epoch": 0.7525348114100311, + "grad_norm": 0.9031246900558472, + "learning_rate": 4.609012665748945e-06, + "loss": 0.101287841796875, + "step": 11133 + }, + { + "epoch": 0.7526024063809652, + "grad_norm": 0.30682581663131714, + "learning_rate": 4.60663331371246e-06, + "loss": 0.04933929443359375, + "step": 11134 + }, + { + "epoch": 0.7526700013518994, + "grad_norm": 0.17272035777568817, + "learning_rate": 4.604254464571681e-06, + "loss": 0.023725509643554688, + "step": 11135 + }, + { + "epoch": 0.7527375963228335, + "grad_norm": 0.6055204272270203, + "learning_rate": 4.601876118441728e-06, + "loss": 0.0934600830078125, + "step": 11136 + }, + { + "epoch": 0.7528051912937678, + "grad_norm": 0.3882589340209961, + "learning_rate": 4.599498275437677e-06, + "loss": 0.07405853271484375, + "step": 11137 + }, + { + "epoch": 0.7528727862647019, + "grad_norm": 0.8049427270889282, + "learning_rate": 4.597120935674572e-06, + "loss": 0.15915679931640625, + "step": 11138 + }, + { + "epoch": 0.7529403812356361, + "grad_norm": 0.22052350640296936, + "learning_rate": 4.5947440992674526e-06, + "loss": 0.04135894775390625, + "step": 11139 + }, + { + "epoch": 0.7530079762065702, + "grad_norm": 0.285110205411911, + "learning_rate": 4.592367766331326e-06, + "loss": 0.046085357666015625, + "step": 11140 + }, + { + "epoch": 0.7530755711775043, + "grad_norm": 0.2238534837961197, + "learning_rate": 4.58999193698117e-06, + "loss": 0.035274505615234375, + "step": 11141 + }, + { + "epoch": 0.7531431661484386, + "grad_norm": 0.4445739686489105, + "learning_rate": 4.587616611331943e-06, + "loss": 0.0793304443359375, + "step": 11142 + }, + { + "epoch": 0.7532107611193727, + "grad_norm": 0.8309602737426758, + "learning_rate": 4.585241789498578e-06, + "loss": 0.146392822265625, + "step": 11143 + }, + { + "epoch": 0.7532783560903069, + "grad_norm": 0.45413145422935486, + "learning_rate": 4.582867471595981e-06, + "loss": 0.07139778137207031, + "step": 11144 + }, + { + "epoch": 0.753345951061241, + "grad_norm": 1.3534749746322632, + "learning_rate": 4.580493657739047e-06, + "loss": 0.13532257080078125, + "step": 11145 + }, + { + "epoch": 0.7534135460321753, + "grad_norm": 0.49127981066703796, + "learning_rate": 4.5781203480426184e-06, + "loss": 0.0958099365234375, + "step": 11146 + }, + { + "epoch": 0.7534811410031094, + "grad_norm": 0.2544126808643341, + "learning_rate": 4.575747542621545e-06, + "loss": 0.04705810546875, + "step": 11147 + }, + { + "epoch": 0.7535487359740435, + "grad_norm": 0.19345030188560486, + "learning_rate": 4.573375241590634e-06, + "loss": 0.035076141357421875, + "step": 11148 + }, + { + "epoch": 0.7536163309449777, + "grad_norm": 0.2972996234893799, + "learning_rate": 4.5710034450646714e-06, + "loss": 0.04101753234863281, + "step": 11149 + }, + { + "epoch": 0.7536839259159118, + "grad_norm": 0.3816569149494171, + "learning_rate": 4.568632153158425e-06, + "loss": 0.067169189453125, + "step": 11150 + }, + { + "epoch": 0.753751520886846, + "grad_norm": 0.2513832151889801, + "learning_rate": 4.566261365986616e-06, + "loss": 0.052906036376953125, + "step": 11151 + }, + { + "epoch": 0.7538191158577802, + "grad_norm": 0.5766844153404236, + "learning_rate": 4.5638910836639734e-06, + "loss": 0.1147003173828125, + "step": 11152 + }, + { + "epoch": 0.7538867108287144, + "grad_norm": 0.8865706324577332, + "learning_rate": 4.561521306305183e-06, + "loss": 0.1608734130859375, + "step": 11153 + }, + { + "epoch": 0.7539543057996485, + "grad_norm": 1.2803102731704712, + "learning_rate": 4.559152034024907e-06, + "loss": 0.1378936767578125, + "step": 11154 + }, + { + "epoch": 0.7540219007705826, + "grad_norm": 0.30707019567489624, + "learning_rate": 4.556783266937786e-06, + "loss": 0.05466461181640625, + "step": 11155 + }, + { + "epoch": 0.7540894957415168, + "grad_norm": 0.6620361804962158, + "learning_rate": 4.554415005158437e-06, + "loss": 0.129486083984375, + "step": 11156 + }, + { + "epoch": 0.754157090712451, + "grad_norm": 0.3155176341533661, + "learning_rate": 4.5520472488014466e-06, + "loss": 0.046695709228515625, + "step": 11157 + }, + { + "epoch": 0.7542246856833852, + "grad_norm": 1.4582836627960205, + "learning_rate": 4.5496799979813936e-06, + "loss": 0.214141845703125, + "step": 11158 + }, + { + "epoch": 0.7542922806543193, + "grad_norm": 0.47676023840904236, + "learning_rate": 4.547313252812803e-06, + "loss": 0.10540771484375, + "step": 11159 + }, + { + "epoch": 0.7543598756252535, + "grad_norm": 0.440900593996048, + "learning_rate": 4.544947013410207e-06, + "loss": 0.074371337890625, + "step": 11160 + }, + { + "epoch": 0.7544274705961876, + "grad_norm": 0.5814980268478394, + "learning_rate": 4.542581279888098e-06, + "loss": 0.104278564453125, + "step": 11161 + }, + { + "epoch": 0.7544950655671218, + "grad_norm": 1.2635791301727295, + "learning_rate": 4.54021605236093e-06, + "loss": 0.2036590576171875, + "step": 11162 + }, + { + "epoch": 0.754562660538056, + "grad_norm": 1.0029810667037964, + "learning_rate": 4.53785133094317e-06, + "loss": 0.13631439208984375, + "step": 11163 + }, + { + "epoch": 0.7546302555089901, + "grad_norm": 0.3950771987438202, + "learning_rate": 4.535487115749214e-06, + "loss": 0.06771087646484375, + "step": 11164 + }, + { + "epoch": 0.7546978504799243, + "grad_norm": 0.386148601770401, + "learning_rate": 4.533123406893476e-06, + "loss": 0.06490516662597656, + "step": 11165 + }, + { + "epoch": 0.7547654454508584, + "grad_norm": 0.39926889538764954, + "learning_rate": 4.53076020449032e-06, + "loss": 0.08123779296875, + "step": 11166 + }, + { + "epoch": 0.7548330404217927, + "grad_norm": 0.9047625660896301, + "learning_rate": 4.528397508654093e-06, + "loss": 0.14398956298828125, + "step": 11167 + }, + { + "epoch": 0.7549006353927268, + "grad_norm": 0.3471490740776062, + "learning_rate": 4.526035319499117e-06, + "loss": 0.06337738037109375, + "step": 11168 + }, + { + "epoch": 0.7549682303636609, + "grad_norm": 0.2543686032295227, + "learning_rate": 4.523673637139688e-06, + "loss": 0.04290580749511719, + "step": 11169 + }, + { + "epoch": 0.7550358253345951, + "grad_norm": 0.39606261253356934, + "learning_rate": 4.521312461690076e-06, + "loss": 0.05511474609375, + "step": 11170 + }, + { + "epoch": 0.7551034203055292, + "grad_norm": 0.3258931040763855, + "learning_rate": 4.518951793264542e-06, + "loss": 0.044342041015625, + "step": 11171 + }, + { + "epoch": 0.7551710152764635, + "grad_norm": 0.39061394333839417, + "learning_rate": 4.516591631977296e-06, + "loss": 0.07274627685546875, + "step": 11172 + }, + { + "epoch": 0.7552386102473976, + "grad_norm": 0.6417104005813599, + "learning_rate": 4.5142319779425395e-06, + "loss": 0.09299468994140625, + "step": 11173 + }, + { + "epoch": 0.7553062052183318, + "grad_norm": 0.5729369521141052, + "learning_rate": 4.511872831274459e-06, + "loss": 0.12762069702148438, + "step": 11174 + }, + { + "epoch": 0.7553738001892659, + "grad_norm": 0.8396062254905701, + "learning_rate": 4.509514192087187e-06, + "loss": 0.17083740234375, + "step": 11175 + }, + { + "epoch": 0.7554413951602, + "grad_norm": 1.1388527154922485, + "learning_rate": 4.507156060494862e-06, + "loss": 0.1263275146484375, + "step": 11176 + }, + { + "epoch": 0.7555089901311343, + "grad_norm": 0.31521815061569214, + "learning_rate": 4.504798436611582e-06, + "loss": 0.0465850830078125, + "step": 11177 + }, + { + "epoch": 0.7555765851020684, + "grad_norm": 0.9150810241699219, + "learning_rate": 4.502441320551422e-06, + "loss": 0.1518707275390625, + "step": 11178 + }, + { + "epoch": 0.7556441800730026, + "grad_norm": 0.7474772334098816, + "learning_rate": 4.500084712428434e-06, + "loss": 0.119110107421875, + "step": 11179 + }, + { + "epoch": 0.7557117750439367, + "grad_norm": 0.8608741760253906, + "learning_rate": 4.497728612356646e-06, + "loss": 0.191986083984375, + "step": 11180 + }, + { + "epoch": 0.7557793700148709, + "grad_norm": 0.5138552784919739, + "learning_rate": 4.495373020450062e-06, + "loss": 0.08260345458984375, + "step": 11181 + }, + { + "epoch": 0.7558469649858051, + "grad_norm": 0.7833166122436523, + "learning_rate": 4.493017936822657e-06, + "loss": 0.130706787109375, + "step": 11182 + }, + { + "epoch": 0.7559145599567392, + "grad_norm": 1.1066724061965942, + "learning_rate": 4.490663361588382e-06, + "loss": 0.1318359375, + "step": 11183 + }, + { + "epoch": 0.7559821549276734, + "grad_norm": 0.6760061979293823, + "learning_rate": 4.488309294861182e-06, + "loss": 0.09331512451171875, + "step": 11184 + }, + { + "epoch": 0.7560497498986075, + "grad_norm": 0.3870461881160736, + "learning_rate": 4.485955736754943e-06, + "loss": 0.0556793212890625, + "step": 11185 + }, + { + "epoch": 0.7561173448695417, + "grad_norm": 0.585544764995575, + "learning_rate": 4.483602687383547e-06, + "loss": 0.1284332275390625, + "step": 11186 + }, + { + "epoch": 0.7561849398404759, + "grad_norm": 0.9021815061569214, + "learning_rate": 4.481250146860863e-06, + "loss": 0.165130615234375, + "step": 11187 + }, + { + "epoch": 0.7562525348114101, + "grad_norm": 0.3518024981021881, + "learning_rate": 4.478898115300704e-06, + "loss": 0.061229705810546875, + "step": 11188 + }, + { + "epoch": 0.7563201297823442, + "grad_norm": 0.3951992988586426, + "learning_rate": 4.476546592816888e-06, + "loss": 0.089599609375, + "step": 11189 + }, + { + "epoch": 0.7563877247532783, + "grad_norm": 0.3150009512901306, + "learning_rate": 4.474195579523192e-06, + "loss": 0.0503692626953125, + "step": 11190 + }, + { + "epoch": 0.7564553197242125, + "grad_norm": 0.4109216034412384, + "learning_rate": 4.471845075533375e-06, + "loss": 0.048961639404296875, + "step": 11191 + }, + { + "epoch": 0.7565229146951467, + "grad_norm": 1.0220890045166016, + "learning_rate": 4.4694950809611665e-06, + "loss": 0.12058639526367188, + "step": 11192 + }, + { + "epoch": 0.7565905096660809, + "grad_norm": 0.581901490688324, + "learning_rate": 4.467145595920273e-06, + "loss": 0.10888671875, + "step": 11193 + }, + { + "epoch": 0.756658104637015, + "grad_norm": 0.7660098075866699, + "learning_rate": 4.464796620524375e-06, + "loss": 0.13458251953125, + "step": 11194 + }, + { + "epoch": 0.7567256996079492, + "grad_norm": 0.6521254777908325, + "learning_rate": 4.462448154887144e-06, + "loss": 0.11186981201171875, + "step": 11195 + }, + { + "epoch": 0.7567932945788833, + "grad_norm": 0.6785961985588074, + "learning_rate": 4.460100199122199e-06, + "loss": 0.1178131103515625, + "step": 11196 + }, + { + "epoch": 0.7568608895498175, + "grad_norm": 0.7019192576408386, + "learning_rate": 4.4577527533431525e-06, + "loss": 0.159423828125, + "step": 11197 + }, + { + "epoch": 0.7569284845207517, + "grad_norm": 0.7975277304649353, + "learning_rate": 4.455405817663591e-06, + "loss": 0.1165008544921875, + "step": 11198 + }, + { + "epoch": 0.7569960794916858, + "grad_norm": 0.31809893250465393, + "learning_rate": 4.453059392197065e-06, + "loss": 0.060329437255859375, + "step": 11199 + }, + { + "epoch": 0.75706367446262, + "grad_norm": 0.5530132055282593, + "learning_rate": 4.450713477057128e-06, + "loss": 0.115997314453125, + "step": 11200 + }, + { + "epoch": 0.7571312694335541, + "grad_norm": 0.3874632716178894, + "learning_rate": 4.448368072357268e-06, + "loss": 0.0706329345703125, + "step": 11201 + }, + { + "epoch": 0.7571988644044884, + "grad_norm": 0.8839913010597229, + "learning_rate": 4.4460231782109835e-06, + "loss": 0.17369842529296875, + "step": 11202 + }, + { + "epoch": 0.7572664593754225, + "grad_norm": 0.5148226618766785, + "learning_rate": 4.443678794731733e-06, + "loss": 0.11017608642578125, + "step": 11203 + }, + { + "epoch": 0.7573340543463566, + "grad_norm": 0.49780869483947754, + "learning_rate": 4.441334922032953e-06, + "loss": 0.10791015625, + "step": 11204 + }, + { + "epoch": 0.7574016493172908, + "grad_norm": 0.7184620499610901, + "learning_rate": 4.438991560228053e-06, + "loss": 0.10218048095703125, + "step": 11205 + }, + { + "epoch": 0.7574692442882249, + "grad_norm": 0.7083460092544556, + "learning_rate": 4.436648709430419e-06, + "loss": 0.1274566650390625, + "step": 11206 + }, + { + "epoch": 0.7575368392591592, + "grad_norm": 0.8927392363548279, + "learning_rate": 4.434306369753413e-06, + "loss": 0.123443603515625, + "step": 11207 + }, + { + "epoch": 0.7576044342300933, + "grad_norm": 0.6218840479850769, + "learning_rate": 4.4319645413103725e-06, + "loss": 0.1085357666015625, + "step": 11208 + }, + { + "epoch": 0.7576720292010275, + "grad_norm": 0.5041053295135498, + "learning_rate": 4.429623224214609e-06, + "loss": 0.08289337158203125, + "step": 11209 + }, + { + "epoch": 0.7577396241719616, + "grad_norm": 0.6891477108001709, + "learning_rate": 4.42728241857941e-06, + "loss": 0.08663558959960938, + "step": 11210 + }, + { + "epoch": 0.7578072191428957, + "grad_norm": 0.30775347352027893, + "learning_rate": 4.424942124518039e-06, + "loss": 0.058685302734375, + "step": 11211 + }, + { + "epoch": 0.75787481411383, + "grad_norm": 0.5744447112083435, + "learning_rate": 4.422602342143729e-06, + "loss": 0.09446048736572266, + "step": 11212 + }, + { + "epoch": 0.7579424090847641, + "grad_norm": 0.49460136890411377, + "learning_rate": 4.420263071569706e-06, + "loss": 0.0606842041015625, + "step": 11213 + }, + { + "epoch": 0.7580100040556983, + "grad_norm": 0.3881671130657196, + "learning_rate": 4.417924312909143e-06, + "loss": 0.06465530395507812, + "step": 11214 + }, + { + "epoch": 0.7580775990266324, + "grad_norm": 1.3898411989212036, + "learning_rate": 4.415586066275215e-06, + "loss": 0.1667327880859375, + "step": 11215 + }, + { + "epoch": 0.7581451939975666, + "grad_norm": 0.6475092172622681, + "learning_rate": 4.4132483317810555e-06, + "loss": 0.122161865234375, + "step": 11216 + }, + { + "epoch": 0.7582127889685008, + "grad_norm": 0.9815313816070557, + "learning_rate": 4.410911109539781e-06, + "loss": 0.188568115234375, + "step": 11217 + }, + { + "epoch": 0.7582803839394349, + "grad_norm": 1.0524834394454956, + "learning_rate": 4.408574399664482e-06, + "loss": 0.1231536865234375, + "step": 11218 + }, + { + "epoch": 0.7583479789103691, + "grad_norm": 0.9130373001098633, + "learning_rate": 4.406238202268218e-06, + "loss": 0.11774826049804688, + "step": 11219 + }, + { + "epoch": 0.7584155738813032, + "grad_norm": 0.6254651546478271, + "learning_rate": 4.403902517464033e-06, + "loss": 0.10355854034423828, + "step": 11220 + }, + { + "epoch": 0.7584831688522374, + "grad_norm": 0.602384090423584, + "learning_rate": 4.40156734536494e-06, + "loss": 0.1389312744140625, + "step": 11221 + }, + { + "epoch": 0.7585507638231715, + "grad_norm": 0.5627150535583496, + "learning_rate": 4.3992326860839295e-06, + "loss": 0.1053009033203125, + "step": 11222 + }, + { + "epoch": 0.7586183587941058, + "grad_norm": 0.3548873960971832, + "learning_rate": 4.3968985397339635e-06, + "loss": 0.0766754150390625, + "step": 11223 + }, + { + "epoch": 0.7586859537650399, + "grad_norm": 0.9649012684822083, + "learning_rate": 4.3945649064279945e-06, + "loss": 0.21630859375, + "step": 11224 + }, + { + "epoch": 0.758753548735974, + "grad_norm": 0.5030504465103149, + "learning_rate": 4.392231786278921e-06, + "loss": 0.08905029296875, + "step": 11225 + }, + { + "epoch": 0.7588211437069082, + "grad_norm": 0.3062540888786316, + "learning_rate": 4.389899179399651e-06, + "loss": 0.06122589111328125, + "step": 11226 + }, + { + "epoch": 0.7588887386778423, + "grad_norm": 0.9987831711769104, + "learning_rate": 4.387567085903035e-06, + "loss": 0.21221923828125, + "step": 11227 + }, + { + "epoch": 0.7589563336487766, + "grad_norm": 0.9894514679908752, + "learning_rate": 4.3852355059019235e-06, + "loss": 0.16335296630859375, + "step": 11228 + }, + { + "epoch": 0.7590239286197107, + "grad_norm": 1.0881553888320923, + "learning_rate": 4.382904439509133e-06, + "loss": 0.1754150390625, + "step": 11229 + }, + { + "epoch": 0.7590915235906448, + "grad_norm": 0.8533079624176025, + "learning_rate": 4.380573886837451e-06, + "loss": 0.13932418823242188, + "step": 11230 + }, + { + "epoch": 0.759159118561579, + "grad_norm": 0.36246204376220703, + "learning_rate": 4.3782438479996465e-06, + "loss": 0.07027435302734375, + "step": 11231 + }, + { + "epoch": 0.7592267135325131, + "grad_norm": 0.7194255590438843, + "learning_rate": 4.375914323108461e-06, + "loss": 0.10364723205566406, + "step": 11232 + }, + { + "epoch": 0.7592943085034474, + "grad_norm": 0.31004270911216736, + "learning_rate": 4.373585312276611e-06, + "loss": 0.06313323974609375, + "step": 11233 + }, + { + "epoch": 0.7593619034743815, + "grad_norm": 0.747821569442749, + "learning_rate": 4.37125681561679e-06, + "loss": 0.125274658203125, + "step": 11234 + }, + { + "epoch": 0.7594294984453157, + "grad_norm": 0.32999318838119507, + "learning_rate": 4.368928833241661e-06, + "loss": 0.04802703857421875, + "step": 11235 + }, + { + "epoch": 0.7594970934162498, + "grad_norm": 0.7929580211639404, + "learning_rate": 4.366601365263865e-06, + "loss": 0.157867431640625, + "step": 11236 + }, + { + "epoch": 0.7595646883871839, + "grad_norm": 0.7643894553184509, + "learning_rate": 4.364274411796032e-06, + "loss": 0.1458587646484375, + "step": 11237 + }, + { + "epoch": 0.7596322833581182, + "grad_norm": 0.4770510494709015, + "learning_rate": 4.361947972950739e-06, + "loss": 0.06360340118408203, + "step": 11238 + }, + { + "epoch": 0.7596998783290523, + "grad_norm": 0.6911308765411377, + "learning_rate": 4.359622048840563e-06, + "loss": 0.12390899658203125, + "step": 11239 + }, + { + "epoch": 0.7597674732999865, + "grad_norm": 0.26595765352249146, + "learning_rate": 4.357296639578045e-06, + "loss": 0.05347442626953125, + "step": 11240 + }, + { + "epoch": 0.7598350682709206, + "grad_norm": 0.28348836302757263, + "learning_rate": 4.354971745275702e-06, + "loss": 0.05517578125, + "step": 11241 + }, + { + "epoch": 0.7599026632418548, + "grad_norm": 0.5260562300682068, + "learning_rate": 4.352647366046025e-06, + "loss": 0.081085205078125, + "step": 11242 + }, + { + "epoch": 0.759970258212789, + "grad_norm": 0.7910944223403931, + "learning_rate": 4.350323502001486e-06, + "loss": 0.11110687255859375, + "step": 11243 + }, + { + "epoch": 0.7600378531837231, + "grad_norm": 1.4332658052444458, + "learning_rate": 4.348000153254523e-06, + "loss": 0.1663360595703125, + "step": 11244 + }, + { + "epoch": 0.7601054481546573, + "grad_norm": 0.2866545617580414, + "learning_rate": 4.345677319917559e-06, + "loss": 0.04070854187011719, + "step": 11245 + }, + { + "epoch": 0.7601730431255914, + "grad_norm": 0.26194003224372864, + "learning_rate": 4.343355002102984e-06, + "loss": 0.03167915344238281, + "step": 11246 + }, + { + "epoch": 0.7602406380965256, + "grad_norm": 0.6690056324005127, + "learning_rate": 4.341033199923165e-06, + "loss": 0.1145782470703125, + "step": 11247 + }, + { + "epoch": 0.7603082330674598, + "grad_norm": 2.3005521297454834, + "learning_rate": 4.338711913490451e-06, + "loss": 0.1010589599609375, + "step": 11248 + }, + { + "epoch": 0.760375828038394, + "grad_norm": 0.7421447038650513, + "learning_rate": 4.3363911429171494e-06, + "loss": 0.13690948486328125, + "step": 11249 + }, + { + "epoch": 0.7604434230093281, + "grad_norm": 1.145377278327942, + "learning_rate": 4.33407088831557e-06, + "loss": 0.11719512939453125, + "step": 11250 + }, + { + "epoch": 0.7605110179802622, + "grad_norm": 1.0068025588989258, + "learning_rate": 4.331751149797963e-06, + "loss": 0.1294097900390625, + "step": 11251 + }, + { + "epoch": 0.7605786129511964, + "grad_norm": 0.3289932310581207, + "learning_rate": 4.329431927476585e-06, + "loss": 0.04679298400878906, + "step": 11252 + }, + { + "epoch": 0.7606462079221306, + "grad_norm": 0.3513255715370178, + "learning_rate": 4.32711322146365e-06, + "loss": 0.04780769348144531, + "step": 11253 + }, + { + "epoch": 0.7607138028930648, + "grad_norm": 0.42056190967559814, + "learning_rate": 4.32479503187135e-06, + "loss": 0.08933258056640625, + "step": 11254 + }, + { + "epoch": 0.7607813978639989, + "grad_norm": 0.21943148970603943, + "learning_rate": 4.322477358811861e-06, + "loss": 0.02927398681640625, + "step": 11255 + }, + { + "epoch": 0.7608489928349331, + "grad_norm": 1.3142820596694946, + "learning_rate": 4.32016020239731e-06, + "loss": 0.1620635986328125, + "step": 11256 + }, + { + "epoch": 0.7609165878058672, + "grad_norm": 0.4602873623371124, + "learning_rate": 4.31784356273983e-06, + "loss": 0.09722900390625, + "step": 11257 + }, + { + "epoch": 0.7609841827768014, + "grad_norm": 0.30329790711402893, + "learning_rate": 4.3155274399515125e-06, + "loss": 0.04193878173828125, + "step": 11258 + }, + { + "epoch": 0.7610517777477356, + "grad_norm": 0.9026622176170349, + "learning_rate": 4.313211834144421e-06, + "loss": 0.1233062744140625, + "step": 11259 + }, + { + "epoch": 0.7611193727186697, + "grad_norm": 0.7482075095176697, + "learning_rate": 4.310896745430602e-06, + "loss": 0.1308746337890625, + "step": 11260 + }, + { + "epoch": 0.7611869676896039, + "grad_norm": 0.276896595954895, + "learning_rate": 4.3085821739220734e-06, + "loss": 0.040184974670410156, + "step": 11261 + }, + { + "epoch": 0.761254562660538, + "grad_norm": 0.370195597410202, + "learning_rate": 4.306268119730823e-06, + "loss": 0.06649398803710938, + "step": 11262 + }, + { + "epoch": 0.7613221576314723, + "grad_norm": 0.6037248969078064, + "learning_rate": 4.303954582968835e-06, + "loss": 0.100433349609375, + "step": 11263 + }, + { + "epoch": 0.7613897526024064, + "grad_norm": 0.9463346600532532, + "learning_rate": 4.3016415637480314e-06, + "loss": 0.1538238525390625, + "step": 11264 + }, + { + "epoch": 0.7614573475733405, + "grad_norm": 1.0257073640823364, + "learning_rate": 4.299329062180349e-06, + "loss": 0.18731689453125, + "step": 11265 + }, + { + "epoch": 0.7615249425442747, + "grad_norm": 0.9787752032279968, + "learning_rate": 4.297017078377675e-06, + "loss": 0.1145477294921875, + "step": 11266 + }, + { + "epoch": 0.7615925375152088, + "grad_norm": 0.9750089049339294, + "learning_rate": 4.294705612451868e-06, + "loss": 0.16278076171875, + "step": 11267 + }, + { + "epoch": 0.7616601324861431, + "grad_norm": 0.2975051701068878, + "learning_rate": 4.2923946645147825e-06, + "loss": 0.05332183837890625, + "step": 11268 + }, + { + "epoch": 0.7617277274570772, + "grad_norm": 1.1019190549850464, + "learning_rate": 4.2900842346782345e-06, + "loss": 0.18017578125, + "step": 11269 + }, + { + "epoch": 0.7617953224280114, + "grad_norm": 0.3368999660015106, + "learning_rate": 4.287774323054015e-06, + "loss": 0.05129241943359375, + "step": 11270 + }, + { + "epoch": 0.7618629173989455, + "grad_norm": 0.6037082076072693, + "learning_rate": 4.285464929753892e-06, + "loss": 0.1090545654296875, + "step": 11271 + }, + { + "epoch": 0.7619305123698796, + "grad_norm": 0.3840063214302063, + "learning_rate": 4.283156054889609e-06, + "loss": 0.058444976806640625, + "step": 11272 + }, + { + "epoch": 0.7619981073408139, + "grad_norm": 0.28837674856185913, + "learning_rate": 4.280847698572884e-06, + "loss": 0.03662872314453125, + "step": 11273 + }, + { + "epoch": 0.762065702311748, + "grad_norm": 0.37234359979629517, + "learning_rate": 4.278539860915407e-06, + "loss": 0.076141357421875, + "step": 11274 + }, + { + "epoch": 0.7621332972826822, + "grad_norm": 2.0419576168060303, + "learning_rate": 4.276232542028845e-06, + "loss": 0.2300567626953125, + "step": 11275 + }, + { + "epoch": 0.7622008922536163, + "grad_norm": 0.5735872983932495, + "learning_rate": 4.273925742024851e-06, + "loss": 0.08587646484375, + "step": 11276 + }, + { + "epoch": 0.7622684872245505, + "grad_norm": 0.9416661858558655, + "learning_rate": 4.2716194610150264e-06, + "loss": 0.185699462890625, + "step": 11277 + }, + { + "epoch": 0.7623360821954847, + "grad_norm": 0.9336414337158203, + "learning_rate": 4.269313699110974e-06, + "loss": 0.155670166015625, + "step": 11278 + }, + { + "epoch": 0.7624036771664188, + "grad_norm": 0.291679710149765, + "learning_rate": 4.267008456424264e-06, + "loss": 0.060821533203125, + "step": 11279 + }, + { + "epoch": 0.762471272137353, + "grad_norm": 0.3040565848350525, + "learning_rate": 4.264703733066424e-06, + "loss": 0.052593231201171875, + "step": 11280 + }, + { + "epoch": 0.7625388671082871, + "grad_norm": 0.4299428164958954, + "learning_rate": 4.262399529148983e-06, + "loss": 0.0644073486328125, + "step": 11281 + }, + { + "epoch": 0.7626064620792213, + "grad_norm": 0.40790215134620667, + "learning_rate": 4.260095844783431e-06, + "loss": 0.07799911499023438, + "step": 11282 + }, + { + "epoch": 0.7626740570501555, + "grad_norm": 1.0887987613677979, + "learning_rate": 4.25779268008123e-06, + "loss": 0.1814727783203125, + "step": 11283 + }, + { + "epoch": 0.7627416520210897, + "grad_norm": 0.29997122287750244, + "learning_rate": 4.255490035153826e-06, + "loss": 0.0594940185546875, + "step": 11284 + }, + { + "epoch": 0.7628092469920238, + "grad_norm": 0.41507992148399353, + "learning_rate": 4.253187910112633e-06, + "loss": 0.08254623413085938, + "step": 11285 + }, + { + "epoch": 0.7628768419629579, + "grad_norm": 0.5691590905189514, + "learning_rate": 4.250886305069038e-06, + "loss": 0.12921142578125, + "step": 11286 + }, + { + "epoch": 0.7629444369338921, + "grad_norm": 0.3261556923389435, + "learning_rate": 4.248585220134423e-06, + "loss": 0.07349395751953125, + "step": 11287 + }, + { + "epoch": 0.7630120319048262, + "grad_norm": 0.9425453543663025, + "learning_rate": 4.246284655420106e-06, + "loss": 0.060451507568359375, + "step": 11288 + }, + { + "epoch": 0.7630796268757605, + "grad_norm": 0.4325556457042694, + "learning_rate": 4.243984611037423e-06, + "loss": 0.07195281982421875, + "step": 11289 + }, + { + "epoch": 0.7631472218466946, + "grad_norm": 0.689395010471344, + "learning_rate": 4.241685087097651e-06, + "loss": 0.11861801147460938, + "step": 11290 + }, + { + "epoch": 0.7632148168176288, + "grad_norm": 0.1961582899093628, + "learning_rate": 4.239386083712055e-06, + "loss": 0.028165817260742188, + "step": 11291 + }, + { + "epoch": 0.7632824117885629, + "grad_norm": 0.5119739174842834, + "learning_rate": 4.23708760099189e-06, + "loss": 0.07984542846679688, + "step": 11292 + }, + { + "epoch": 0.763350006759497, + "grad_norm": 0.1978624016046524, + "learning_rate": 4.23478963904835e-06, + "loss": 0.032012939453125, + "step": 11293 + }, + { + "epoch": 0.7634176017304313, + "grad_norm": 0.37003010511398315, + "learning_rate": 4.232492197992641e-06, + "loss": 0.05145835876464844, + "step": 11294 + }, + { + "epoch": 0.7634851967013654, + "grad_norm": 0.8230168223381042, + "learning_rate": 4.230195277935922e-06, + "loss": 0.1721649169921875, + "step": 11295 + }, + { + "epoch": 0.7635527916722996, + "grad_norm": 1.2414817810058594, + "learning_rate": 4.227898878989332e-06, + "loss": 0.18292236328125, + "step": 11296 + }, + { + "epoch": 0.7636203866432337, + "grad_norm": 0.6315950751304626, + "learning_rate": 4.2256030012639855e-06, + "loss": 0.09637451171875, + "step": 11297 + }, + { + "epoch": 0.763687981614168, + "grad_norm": 1.3332653045654297, + "learning_rate": 4.223307644870971e-06, + "loss": 0.19873046875, + "step": 11298 + }, + { + "epoch": 0.7637555765851021, + "grad_norm": 0.3495306968688965, + "learning_rate": 4.221012809921347e-06, + "loss": 0.05442047119140625, + "step": 11299 + }, + { + "epoch": 0.7638231715560362, + "grad_norm": 1.5189865827560425, + "learning_rate": 4.218718496526167e-06, + "loss": 0.20379638671875, + "step": 11300 + }, + { + "epoch": 0.7638907665269704, + "grad_norm": 0.90889573097229, + "learning_rate": 4.2164247047964285e-06, + "loss": 0.17864990234375, + "step": 11301 + }, + { + "epoch": 0.7639583614979045, + "grad_norm": 0.45586398243904114, + "learning_rate": 4.214131434843127e-06, + "loss": 0.08379364013671875, + "step": 11302 + }, + { + "epoch": 0.7640259564688388, + "grad_norm": 0.45551854372024536, + "learning_rate": 4.211838686777221e-06, + "loss": 0.058528900146484375, + "step": 11303 + }, + { + "epoch": 0.7640935514397729, + "grad_norm": 0.9709056615829468, + "learning_rate": 4.2095464607096455e-06, + "loss": 0.1905517578125, + "step": 11304 + }, + { + "epoch": 0.7641611464107071, + "grad_norm": 0.6839447021484375, + "learning_rate": 4.207254756751329e-06, + "loss": 0.1259307861328125, + "step": 11305 + }, + { + "epoch": 0.7642287413816412, + "grad_norm": 0.3673865497112274, + "learning_rate": 4.204963575013135e-06, + "loss": 0.06714630126953125, + "step": 11306 + }, + { + "epoch": 0.7642963363525753, + "grad_norm": 0.37257441878318787, + "learning_rate": 4.202672915605944e-06, + "loss": 0.0661163330078125, + "step": 11307 + }, + { + "epoch": 0.7643639313235095, + "grad_norm": 0.2775799334049225, + "learning_rate": 4.200382778640583e-06, + "loss": 0.034297943115234375, + "step": 11308 + }, + { + "epoch": 0.7644315262944437, + "grad_norm": 0.21717776358127594, + "learning_rate": 4.198093164227866e-06, + "loss": 0.0354156494140625, + "step": 11309 + }, + { + "epoch": 0.7644991212653779, + "grad_norm": 0.8242968320846558, + "learning_rate": 4.195804072478579e-06, + "loss": 0.140289306640625, + "step": 11310 + }, + { + "epoch": 0.764566716236312, + "grad_norm": 0.6006810069084167, + "learning_rate": 4.19351550350348e-06, + "loss": 0.10540771484375, + "step": 11311 + }, + { + "epoch": 0.7646343112072462, + "grad_norm": 0.4513166844844818, + "learning_rate": 4.191227457413303e-06, + "loss": 0.07790374755859375, + "step": 11312 + }, + { + "epoch": 0.7647019061781803, + "grad_norm": 0.3705326318740845, + "learning_rate": 4.188939934318768e-06, + "loss": 0.068450927734375, + "step": 11313 + }, + { + "epoch": 0.7647695011491145, + "grad_norm": 0.23781204223632812, + "learning_rate": 4.1866529343305485e-06, + "loss": 0.03301048278808594, + "step": 11314 + }, + { + "epoch": 0.7648370961200487, + "grad_norm": 1.3093899488449097, + "learning_rate": 4.184366457559302e-06, + "loss": 0.2044677734375, + "step": 11315 + }, + { + "epoch": 0.7649046910909828, + "grad_norm": 0.9414105415344238, + "learning_rate": 4.182080504115678e-06, + "loss": 0.1826171875, + "step": 11316 + }, + { + "epoch": 0.764972286061917, + "grad_norm": 1.4356807470321655, + "learning_rate": 4.179795074110265e-06, + "loss": 0.211669921875, + "step": 11317 + }, + { + "epoch": 0.7650398810328511, + "grad_norm": 1.0096447467803955, + "learning_rate": 4.177510167653667e-06, + "loss": 0.166107177734375, + "step": 11318 + }, + { + "epoch": 0.7651074760037854, + "grad_norm": 0.6860858798027039, + "learning_rate": 4.175225784856423e-06, + "loss": 0.11691665649414062, + "step": 11319 + }, + { + "epoch": 0.7651750709747195, + "grad_norm": 0.9241011738777161, + "learning_rate": 4.1729419258290765e-06, + "loss": 0.11646270751953125, + "step": 11320 + }, + { + "epoch": 0.7652426659456536, + "grad_norm": 0.1994401067495346, + "learning_rate": 4.170658590682134e-06, + "loss": 0.0341339111328125, + "step": 11321 + }, + { + "epoch": 0.7653102609165878, + "grad_norm": 1.4127068519592285, + "learning_rate": 4.168375779526075e-06, + "loss": 0.16561126708984375, + "step": 11322 + }, + { + "epoch": 0.7653778558875219, + "grad_norm": 0.5759634971618652, + "learning_rate": 4.166093492471357e-06, + "loss": 0.1039886474609375, + "step": 11323 + }, + { + "epoch": 0.7654454508584562, + "grad_norm": 1.2171772718429565, + "learning_rate": 4.1638117296284116e-06, + "loss": 0.13002967834472656, + "step": 11324 + }, + { + "epoch": 0.7655130458293903, + "grad_norm": 0.3077627420425415, + "learning_rate": 4.161530491107643e-06, + "loss": 0.04390716552734375, + "step": 11325 + }, + { + "epoch": 0.7655806408003245, + "grad_norm": 0.3899852931499481, + "learning_rate": 4.159249777019435e-06, + "loss": 0.086273193359375, + "step": 11326 + }, + { + "epoch": 0.7656482357712586, + "grad_norm": 1.2380256652832031, + "learning_rate": 4.1569695874741395e-06, + "loss": 0.2032470703125, + "step": 11327 + }, + { + "epoch": 0.7657158307421927, + "grad_norm": 0.8323485851287842, + "learning_rate": 4.154689922582082e-06, + "loss": 0.137664794921875, + "step": 11328 + }, + { + "epoch": 0.765783425713127, + "grad_norm": 0.6887428164482117, + "learning_rate": 4.152410782453583e-06, + "loss": 0.10888290405273438, + "step": 11329 + }, + { + "epoch": 0.7658510206840611, + "grad_norm": 0.6713868975639343, + "learning_rate": 4.150132167198901e-06, + "loss": 0.10601806640625, + "step": 11330 + }, + { + "epoch": 0.7659186156549953, + "grad_norm": 0.922360897064209, + "learning_rate": 4.147854076928304e-06, + "loss": 0.18328857421875, + "step": 11331 + }, + { + "epoch": 0.7659862106259294, + "grad_norm": 0.4698032736778259, + "learning_rate": 4.145576511752016e-06, + "loss": 0.108642578125, + "step": 11332 + }, + { + "epoch": 0.7660538055968636, + "grad_norm": 0.65046226978302, + "learning_rate": 4.143299471780238e-06, + "loss": 0.1005706787109375, + "step": 11333 + }, + { + "epoch": 0.7661214005677978, + "grad_norm": 0.8335537910461426, + "learning_rate": 4.141022957123149e-06, + "loss": 0.1230926513671875, + "step": 11334 + }, + { + "epoch": 0.7661889955387319, + "grad_norm": 0.4083234965801239, + "learning_rate": 4.138746967890902e-06, + "loss": 0.067230224609375, + "step": 11335 + }, + { + "epoch": 0.7662565905096661, + "grad_norm": 0.5525472164154053, + "learning_rate": 4.13647150419362e-06, + "loss": 0.0978851318359375, + "step": 11336 + }, + { + "epoch": 0.7663241854806002, + "grad_norm": 0.5770962834358215, + "learning_rate": 4.1341965661414085e-06, + "loss": 0.1094970703125, + "step": 11337 + }, + { + "epoch": 0.7663917804515344, + "grad_norm": 0.6985999345779419, + "learning_rate": 4.131922153844338e-06, + "loss": 0.09965896606445312, + "step": 11338 + }, + { + "epoch": 0.7664593754224686, + "grad_norm": 1.1980535984039307, + "learning_rate": 4.1296482674124645e-06, + "loss": 0.192291259765625, + "step": 11339 + }, + { + "epoch": 0.7665269703934028, + "grad_norm": 0.2903289496898651, + "learning_rate": 4.127374906955809e-06, + "loss": 0.05934906005859375, + "step": 11340 + }, + { + "epoch": 0.7665945653643369, + "grad_norm": 0.7723947167396545, + "learning_rate": 4.125102072584365e-06, + "loss": 0.1451568603515625, + "step": 11341 + }, + { + "epoch": 0.766662160335271, + "grad_norm": 2.6362457275390625, + "learning_rate": 4.122829764408124e-06, + "loss": 0.2523193359375, + "step": 11342 + }, + { + "epoch": 0.7667297553062052, + "grad_norm": 0.9411324858665466, + "learning_rate": 4.120557982537014e-06, + "loss": 0.2099609375, + "step": 11343 + }, + { + "epoch": 0.7667973502771394, + "grad_norm": 1.633813738822937, + "learning_rate": 4.118286727080973e-06, + "loss": 0.1619415283203125, + "step": 11344 + }, + { + "epoch": 0.7668649452480736, + "grad_norm": 0.8259834051132202, + "learning_rate": 4.116015998149892e-06, + "loss": 0.1551971435546875, + "step": 11345 + }, + { + "epoch": 0.7669325402190077, + "grad_norm": 0.5949472188949585, + "learning_rate": 4.113745795853643e-06, + "loss": 0.1389312744140625, + "step": 11346 + }, + { + "epoch": 0.7670001351899419, + "grad_norm": 0.73082035779953, + "learning_rate": 4.111476120302073e-06, + "loss": 0.144866943359375, + "step": 11347 + }, + { + "epoch": 0.767067730160876, + "grad_norm": 0.7940864562988281, + "learning_rate": 4.109206971605004e-06, + "loss": 0.106842041015625, + "step": 11348 + }, + { + "epoch": 0.7671353251318102, + "grad_norm": 0.2313947081565857, + "learning_rate": 4.106938349872229e-06, + "loss": 0.040134429931640625, + "step": 11349 + }, + { + "epoch": 0.7672029201027444, + "grad_norm": 0.2969278395175934, + "learning_rate": 4.1046702552135235e-06, + "loss": 0.057159423828125, + "step": 11350 + }, + { + "epoch": 0.7672705150736785, + "grad_norm": 0.8698536157608032, + "learning_rate": 4.1024026877386254e-06, + "loss": 0.11668777465820312, + "step": 11351 + }, + { + "epoch": 0.7673381100446127, + "grad_norm": 0.5586618781089783, + "learning_rate": 4.100135647557258e-06, + "loss": 0.1105499267578125, + "step": 11352 + }, + { + "epoch": 0.7674057050155468, + "grad_norm": 0.45200544595718384, + "learning_rate": 4.097869134779114e-06, + "loss": 0.06683731079101562, + "step": 11353 + }, + { + "epoch": 0.7674732999864811, + "grad_norm": 0.530935525894165, + "learning_rate": 4.095603149513856e-06, + "loss": 0.08657073974609375, + "step": 11354 + }, + { + "epoch": 0.7675408949574152, + "grad_norm": 1.1356291770935059, + "learning_rate": 4.09333769187114e-06, + "loss": 0.174591064453125, + "step": 11355 + }, + { + "epoch": 0.7676084899283493, + "grad_norm": 0.83822101354599, + "learning_rate": 4.091072761960566e-06, + "loss": 0.1364593505859375, + "step": 11356 + }, + { + "epoch": 0.7676760848992835, + "grad_norm": 0.42901748418807983, + "learning_rate": 4.088808359891737e-06, + "loss": 0.08263397216796875, + "step": 11357 + }, + { + "epoch": 0.7677436798702176, + "grad_norm": 0.5159794688224792, + "learning_rate": 4.086544485774215e-06, + "loss": 0.05992698669433594, + "step": 11358 + }, + { + "epoch": 0.7678112748411519, + "grad_norm": 0.9929391741752625, + "learning_rate": 4.084281139717543e-06, + "loss": 0.11345672607421875, + "step": 11359 + }, + { + "epoch": 0.767878869812086, + "grad_norm": 0.5475705862045288, + "learning_rate": 4.082018321831232e-06, + "loss": 0.0853424072265625, + "step": 11360 + }, + { + "epoch": 0.7679464647830201, + "grad_norm": 0.7718577980995178, + "learning_rate": 4.079756032224774e-06, + "loss": 0.13031005859375, + "step": 11361 + }, + { + "epoch": 0.7680140597539543, + "grad_norm": 0.2057192623615265, + "learning_rate": 4.0774942710076305e-06, + "loss": 0.041820526123046875, + "step": 11362 + }, + { + "epoch": 0.7680816547248884, + "grad_norm": 0.9778227806091309, + "learning_rate": 4.07523303828924e-06, + "loss": 0.14038848876953125, + "step": 11363 + }, + { + "epoch": 0.7681492496958227, + "grad_norm": 1.565314531326294, + "learning_rate": 4.072972334179017e-06, + "loss": 0.283447265625, + "step": 11364 + }, + { + "epoch": 0.7682168446667568, + "grad_norm": 0.7858806848526001, + "learning_rate": 4.070712158786346e-06, + "loss": 0.1024169921875, + "step": 11365 + }, + { + "epoch": 0.768284439637691, + "grad_norm": 0.4693056643009186, + "learning_rate": 4.068452512220589e-06, + "loss": 0.0678863525390625, + "step": 11366 + }, + { + "epoch": 0.7683520346086251, + "grad_norm": 0.4629943072795868, + "learning_rate": 4.066193394591079e-06, + "loss": 0.0949859619140625, + "step": 11367 + }, + { + "epoch": 0.7684196295795592, + "grad_norm": 0.7041587233543396, + "learning_rate": 4.063934806007137e-06, + "loss": 0.1155853271484375, + "step": 11368 + }, + { + "epoch": 0.7684872245504935, + "grad_norm": 0.39653480052948, + "learning_rate": 4.061676746578029e-06, + "loss": 0.06912994384765625, + "step": 11369 + }, + { + "epoch": 0.7685548195214276, + "grad_norm": 0.24304597079753876, + "learning_rate": 4.059419216413031e-06, + "loss": 0.03521728515625, + "step": 11370 + }, + { + "epoch": 0.7686224144923618, + "grad_norm": 0.2435053139925003, + "learning_rate": 4.057162215621375e-06, + "loss": 0.040615081787109375, + "step": 11371 + }, + { + "epoch": 0.7686900094632959, + "grad_norm": 0.4823057949542999, + "learning_rate": 4.054905744312255e-06, + "loss": 0.11020660400390625, + "step": 11372 + }, + { + "epoch": 0.7687576044342301, + "grad_norm": 0.6927374601364136, + "learning_rate": 4.052649802594865e-06, + "loss": 0.13311767578125, + "step": 11373 + }, + { + "epoch": 0.7688251994051643, + "grad_norm": 0.26981937885284424, + "learning_rate": 4.050394390578359e-06, + "loss": 0.04715919494628906, + "step": 11374 + }, + { + "epoch": 0.7688927943760984, + "grad_norm": 1.0234030485153198, + "learning_rate": 4.048139508371869e-06, + "loss": 0.1478271484375, + "step": 11375 + }, + { + "epoch": 0.7689603893470326, + "grad_norm": 1.0649882555007935, + "learning_rate": 4.045885156084495e-06, + "loss": 0.147735595703125, + "step": 11376 + }, + { + "epoch": 0.7690279843179667, + "grad_norm": 0.39820700883865356, + "learning_rate": 4.043631333825322e-06, + "loss": 0.06402587890625, + "step": 11377 + }, + { + "epoch": 0.7690955792889009, + "grad_norm": 0.8310366272926331, + "learning_rate": 4.041378041703398e-06, + "loss": 0.1098785400390625, + "step": 11378 + }, + { + "epoch": 0.769163174259835, + "grad_norm": 0.9830165505409241, + "learning_rate": 4.039125279827764e-06, + "loss": 0.1732025146484375, + "step": 11379 + }, + { + "epoch": 0.7692307692307693, + "grad_norm": 0.9352400302886963, + "learning_rate": 4.036873048307405e-06, + "loss": 0.177886962890625, + "step": 11380 + }, + { + "epoch": 0.7692983642017034, + "grad_norm": 0.23517689108848572, + "learning_rate": 4.034621347251315e-06, + "loss": 0.031097412109375, + "step": 11381 + }, + { + "epoch": 0.7693659591726375, + "grad_norm": 0.2667534053325653, + "learning_rate": 4.032370176768429e-06, + "loss": 0.03797626495361328, + "step": 11382 + }, + { + "epoch": 0.7694335541435717, + "grad_norm": 0.8809168934822083, + "learning_rate": 4.0301195369676855e-06, + "loss": 0.158935546875, + "step": 11383 + }, + { + "epoch": 0.7695011491145058, + "grad_norm": 0.4368557333946228, + "learning_rate": 4.027869427957983e-06, + "loss": 0.0782318115234375, + "step": 11384 + }, + { + "epoch": 0.7695687440854401, + "grad_norm": 0.531822919845581, + "learning_rate": 4.025619849848184e-06, + "loss": 0.091339111328125, + "step": 11385 + }, + { + "epoch": 0.7696363390563742, + "grad_norm": 0.3157723844051361, + "learning_rate": 4.023370802747151e-06, + "loss": 0.045318603515625, + "step": 11386 + }, + { + "epoch": 0.7697039340273084, + "grad_norm": 0.9240117073059082, + "learning_rate": 4.0211222867637e-06, + "loss": 0.10651397705078125, + "step": 11387 + }, + { + "epoch": 0.7697715289982425, + "grad_norm": 1.0503509044647217, + "learning_rate": 4.018874302006631e-06, + "loss": 0.16878509521484375, + "step": 11388 + }, + { + "epoch": 0.7698391239691766, + "grad_norm": 0.7991052269935608, + "learning_rate": 4.016626848584713e-06, + "loss": 0.1640777587890625, + "step": 11389 + }, + { + "epoch": 0.7699067189401109, + "grad_norm": 0.22635389864444733, + "learning_rate": 4.014379926606694e-06, + "loss": 0.036396026611328125, + "step": 11390 + }, + { + "epoch": 0.769974313911045, + "grad_norm": 0.8568786978721619, + "learning_rate": 4.0121335361812885e-06, + "loss": 0.161712646484375, + "step": 11391 + }, + { + "epoch": 0.7700419088819792, + "grad_norm": 0.18934117257595062, + "learning_rate": 4.009887677417203e-06, + "loss": 0.028446197509765625, + "step": 11392 + }, + { + "epoch": 0.7701095038529133, + "grad_norm": 1.14443039894104, + "learning_rate": 4.0076423504230905e-06, + "loss": 0.13780975341796875, + "step": 11393 + }, + { + "epoch": 0.7701770988238476, + "grad_norm": 0.9933751821517944, + "learning_rate": 4.005397555307611e-06, + "loss": 0.11124515533447266, + "step": 11394 + }, + { + "epoch": 0.7702446937947817, + "grad_norm": 0.8507648706436157, + "learning_rate": 4.003153292179369e-06, + "loss": 0.16996002197265625, + "step": 11395 + }, + { + "epoch": 0.7703122887657158, + "grad_norm": 0.45761677622795105, + "learning_rate": 4.000909561146953e-06, + "loss": 0.048458099365234375, + "step": 11396 + }, + { + "epoch": 0.77037988373665, + "grad_norm": 0.8726421594619751, + "learning_rate": 3.998666362318945e-06, + "loss": 0.14434814453125, + "step": 11397 + }, + { + "epoch": 0.7704474787075841, + "grad_norm": 0.9318409562110901, + "learning_rate": 3.996423695803864e-06, + "loss": 0.12834930419921875, + "step": 11398 + }, + { + "epoch": 0.7705150736785183, + "grad_norm": 0.9738031625747681, + "learning_rate": 3.994181561710243e-06, + "loss": 0.14754867553710938, + "step": 11399 + }, + { + "epoch": 0.7705826686494525, + "grad_norm": 0.686907947063446, + "learning_rate": 3.9919399601465595e-06, + "loss": 0.13526153564453125, + "step": 11400 + }, + { + "epoch": 0.7706502636203867, + "grad_norm": 0.38260143995285034, + "learning_rate": 3.989698891221281e-06, + "loss": 0.09409332275390625, + "step": 11401 + }, + { + "epoch": 0.7707178585913208, + "grad_norm": 0.7949209213256836, + "learning_rate": 3.987458355042841e-06, + "loss": 0.1312255859375, + "step": 11402 + }, + { + "epoch": 0.7707854535622549, + "grad_norm": 0.3963637948036194, + "learning_rate": 3.9852183517196525e-06, + "loss": 0.09000396728515625, + "step": 11403 + }, + { + "epoch": 0.7708530485331891, + "grad_norm": 0.593038022518158, + "learning_rate": 3.982978881360097e-06, + "loss": 0.09410476684570312, + "step": 11404 + }, + { + "epoch": 0.7709206435041233, + "grad_norm": 0.6815799474716187, + "learning_rate": 3.980739944072546e-06, + "loss": 0.093475341796875, + "step": 11405 + }, + { + "epoch": 0.7709882384750575, + "grad_norm": 0.12810395658016205, + "learning_rate": 3.9785015399653184e-06, + "loss": 0.012843132019042969, + "step": 11406 + }, + { + "epoch": 0.7710558334459916, + "grad_norm": 0.5326944589614868, + "learning_rate": 3.9762636691467245e-06, + "loss": 0.07604217529296875, + "step": 11407 + }, + { + "epoch": 0.7711234284169258, + "grad_norm": 0.8440869450569153, + "learning_rate": 3.9740263317250605e-06, + "loss": 0.142730712890625, + "step": 11408 + }, + { + "epoch": 0.7711910233878599, + "grad_norm": 1.4816533327102661, + "learning_rate": 3.971789527808562e-06, + "loss": 0.19808197021484375, + "step": 11409 + }, + { + "epoch": 0.7712586183587941, + "grad_norm": 0.6253171563148499, + "learning_rate": 3.96955325750548e-06, + "loss": 0.110809326171875, + "step": 11410 + }, + { + "epoch": 0.7713262133297283, + "grad_norm": 1.3241291046142578, + "learning_rate": 3.967317520923998e-06, + "loss": 0.210723876953125, + "step": 11411 + }, + { + "epoch": 0.7713938083006624, + "grad_norm": 0.9118282198905945, + "learning_rate": 3.965082318172312e-06, + "loss": 0.12653350830078125, + "step": 11412 + }, + { + "epoch": 0.7714614032715966, + "grad_norm": 0.8349437117576599, + "learning_rate": 3.962847649358569e-06, + "loss": 0.11684036254882812, + "step": 11413 + }, + { + "epoch": 0.7715289982425307, + "grad_norm": 0.8172737956047058, + "learning_rate": 3.960613514590896e-06, + "loss": 0.144561767578125, + "step": 11414 + }, + { + "epoch": 0.771596593213465, + "grad_norm": 0.7021103501319885, + "learning_rate": 3.9583799139773925e-06, + "loss": 0.1231689453125, + "step": 11415 + }, + { + "epoch": 0.7716641881843991, + "grad_norm": 0.7327991724014282, + "learning_rate": 3.956146847626138e-06, + "loss": 0.08763313293457031, + "step": 11416 + }, + { + "epoch": 0.7717317831553332, + "grad_norm": 0.6751159429550171, + "learning_rate": 3.953914315645175e-06, + "loss": 0.1175384521484375, + "step": 11417 + }, + { + "epoch": 0.7717993781262674, + "grad_norm": 0.48835161328315735, + "learning_rate": 3.951682318142541e-06, + "loss": 0.071990966796875, + "step": 11418 + }, + { + "epoch": 0.7718669730972015, + "grad_norm": 0.5643985271453857, + "learning_rate": 3.949450855226221e-06, + "loss": 0.0998077392578125, + "step": 11419 + }, + { + "epoch": 0.7719345680681358, + "grad_norm": 0.6537798643112183, + "learning_rate": 3.947219927004186e-06, + "loss": 0.0999298095703125, + "step": 11420 + }, + { + "epoch": 0.7720021630390699, + "grad_norm": 0.422637403011322, + "learning_rate": 3.944989533584398e-06, + "loss": 0.0797119140625, + "step": 11421 + }, + { + "epoch": 0.7720697580100041, + "grad_norm": 0.18545222282409668, + "learning_rate": 3.942759675074755e-06, + "loss": 0.028776168823242188, + "step": 11422 + }, + { + "epoch": 0.7721373529809382, + "grad_norm": 0.5389845371246338, + "learning_rate": 3.940530351583169e-06, + "loss": 0.101806640625, + "step": 11423 + }, + { + "epoch": 0.7722049479518723, + "grad_norm": 1.0479904413223267, + "learning_rate": 3.9383015632175036e-06, + "loss": 0.13911819458007812, + "step": 11424 + }, + { + "epoch": 0.7722725429228066, + "grad_norm": 1.1288235187530518, + "learning_rate": 3.9360733100856e-06, + "loss": 0.1187896728515625, + "step": 11425 + }, + { + "epoch": 0.7723401378937407, + "grad_norm": 0.3299504518508911, + "learning_rate": 3.933845592295276e-06, + "loss": 0.049530029296875, + "step": 11426 + }, + { + "epoch": 0.7724077328646749, + "grad_norm": 0.8216418623924255, + "learning_rate": 3.9316184099543215e-06, + "loss": 0.164031982421875, + "step": 11427 + }, + { + "epoch": 0.772475327835609, + "grad_norm": 0.7765181660652161, + "learning_rate": 3.929391763170501e-06, + "loss": 0.166412353515625, + "step": 11428 + }, + { + "epoch": 0.7725429228065432, + "grad_norm": 0.9660146236419678, + "learning_rate": 3.9271656520515544e-06, + "loss": 0.12622833251953125, + "step": 11429 + }, + { + "epoch": 0.7726105177774774, + "grad_norm": 0.546200156211853, + "learning_rate": 3.924940076705196e-06, + "loss": 0.0886688232421875, + "step": 11430 + }, + { + "epoch": 0.7726781127484115, + "grad_norm": 0.5625902414321899, + "learning_rate": 3.9227150372391114e-06, + "loss": 0.10111236572265625, + "step": 11431 + }, + { + "epoch": 0.7727457077193457, + "grad_norm": 1.019885778427124, + "learning_rate": 3.920490533760962e-06, + "loss": 0.13378143310546875, + "step": 11432 + }, + { + "epoch": 0.7728133026902798, + "grad_norm": 0.4414369761943817, + "learning_rate": 3.918266566378379e-06, + "loss": 0.070159912109375, + "step": 11433 + }, + { + "epoch": 0.772880897661214, + "grad_norm": 0.9160630106925964, + "learning_rate": 3.916043135198985e-06, + "loss": 0.141082763671875, + "step": 11434 + }, + { + "epoch": 0.7729484926321482, + "grad_norm": 0.8900964260101318, + "learning_rate": 3.913820240330346e-06, + "loss": 0.1157684326171875, + "step": 11435 + }, + { + "epoch": 0.7730160876030824, + "grad_norm": 0.31842178106307983, + "learning_rate": 3.911597881880032e-06, + "loss": 0.037776947021484375, + "step": 11436 + }, + { + "epoch": 0.7730836825740165, + "grad_norm": 0.501693606376648, + "learning_rate": 3.90937605995557e-06, + "loss": 0.07555389404296875, + "step": 11437 + }, + { + "epoch": 0.7731512775449506, + "grad_norm": 0.9508317708969116, + "learning_rate": 3.907154774664466e-06, + "loss": 0.16363525390625, + "step": 11438 + }, + { + "epoch": 0.7732188725158848, + "grad_norm": 0.29400351643562317, + "learning_rate": 3.9049340261142004e-06, + "loss": 0.0412139892578125, + "step": 11439 + }, + { + "epoch": 0.773286467486819, + "grad_norm": 0.15941929817199707, + "learning_rate": 3.902713814412226e-06, + "loss": 0.027286529541015625, + "step": 11440 + }, + { + "epoch": 0.7733540624577532, + "grad_norm": 2.04015851020813, + "learning_rate": 3.9004941396659705e-06, + "loss": 0.14150428771972656, + "step": 11441 + }, + { + "epoch": 0.7734216574286873, + "grad_norm": 0.23409244418144226, + "learning_rate": 3.898275001982834e-06, + "loss": 0.039737701416015625, + "step": 11442 + }, + { + "epoch": 0.7734892523996215, + "grad_norm": 0.1896798014640808, + "learning_rate": 3.896056401470197e-06, + "loss": 0.026749610900878906, + "step": 11443 + }, + { + "epoch": 0.7735568473705556, + "grad_norm": 0.3439699411392212, + "learning_rate": 3.893838338235403e-06, + "loss": 0.08032989501953125, + "step": 11444 + }, + { + "epoch": 0.7736244423414897, + "grad_norm": 1.0996379852294922, + "learning_rate": 3.89162081238578e-06, + "loss": 0.15702056884765625, + "step": 11445 + }, + { + "epoch": 0.773692037312424, + "grad_norm": 0.3618098199367523, + "learning_rate": 3.8894038240286194e-06, + "loss": 0.064910888671875, + "step": 11446 + }, + { + "epoch": 0.7737596322833581, + "grad_norm": 0.7944791913032532, + "learning_rate": 3.887187373271207e-06, + "loss": 0.08646583557128906, + "step": 11447 + }, + { + "epoch": 0.7738272272542923, + "grad_norm": 1.899794101715088, + "learning_rate": 3.884971460220772e-06, + "loss": 0.190093994140625, + "step": 11448 + }, + { + "epoch": 0.7738948222252264, + "grad_norm": 0.3958301544189453, + "learning_rate": 3.8827560849845435e-06, + "loss": 0.084381103515625, + "step": 11449 + }, + { + "epoch": 0.7739624171961607, + "grad_norm": 0.9962709546089172, + "learning_rate": 3.880541247669714e-06, + "loss": 0.1656951904296875, + "step": 11450 + }, + { + "epoch": 0.7740300121670948, + "grad_norm": 0.8874580264091492, + "learning_rate": 3.878326948383449e-06, + "loss": 0.192138671875, + "step": 11451 + }, + { + "epoch": 0.7740976071380289, + "grad_norm": 0.8152357935905457, + "learning_rate": 3.876113187232892e-06, + "loss": 0.1209869384765625, + "step": 11452 + }, + { + "epoch": 0.7741652021089631, + "grad_norm": 0.4632919430732727, + "learning_rate": 3.87389996432516e-06, + "loss": 0.08224868774414062, + "step": 11453 + }, + { + "epoch": 0.7742327970798972, + "grad_norm": 0.6737679839134216, + "learning_rate": 3.871687279767337e-06, + "loss": 0.112213134765625, + "step": 11454 + }, + { + "epoch": 0.7743003920508315, + "grad_norm": 0.4599461853504181, + "learning_rate": 3.8694751336664916e-06, + "loss": 0.054805755615234375, + "step": 11455 + }, + { + "epoch": 0.7743679870217656, + "grad_norm": 0.6895148754119873, + "learning_rate": 3.867263526129658e-06, + "loss": 0.1158294677734375, + "step": 11456 + }, + { + "epoch": 0.7744355819926998, + "grad_norm": 0.9262718558311462, + "learning_rate": 3.8650524572638485e-06, + "loss": 0.10016632080078125, + "step": 11457 + }, + { + "epoch": 0.7745031769636339, + "grad_norm": 0.9431226849555969, + "learning_rate": 3.862841927176049e-06, + "loss": 0.195098876953125, + "step": 11458 + }, + { + "epoch": 0.774570771934568, + "grad_norm": 0.6018277406692505, + "learning_rate": 3.860631935973215e-06, + "loss": 0.12358856201171875, + "step": 11459 + }, + { + "epoch": 0.7746383669055023, + "grad_norm": 0.5612691044807434, + "learning_rate": 3.858422483762291e-06, + "loss": 0.10003662109375, + "step": 11460 + }, + { + "epoch": 0.7747059618764364, + "grad_norm": 0.36443838477134705, + "learning_rate": 3.856213570650167e-06, + "loss": 0.057847023010253906, + "step": 11461 + }, + { + "epoch": 0.7747735568473706, + "grad_norm": 0.7627846598625183, + "learning_rate": 3.854005196743737e-06, + "loss": 0.12583160400390625, + "step": 11462 + }, + { + "epoch": 0.7748411518183047, + "grad_norm": 1.032416582107544, + "learning_rate": 3.8517973621498525e-06, + "loss": 0.204803466796875, + "step": 11463 + }, + { + "epoch": 0.7749087467892389, + "grad_norm": 0.3429111838340759, + "learning_rate": 3.84959006697534e-06, + "loss": 0.06491851806640625, + "step": 11464 + }, + { + "epoch": 0.774976341760173, + "grad_norm": 0.7923491597175598, + "learning_rate": 3.8473833113270055e-06, + "loss": 0.08756256103515625, + "step": 11465 + }, + { + "epoch": 0.7750439367311072, + "grad_norm": 0.2436274290084839, + "learning_rate": 3.845177095311623e-06, + "loss": 0.043460845947265625, + "step": 11466 + }, + { + "epoch": 0.7751115317020414, + "grad_norm": 0.3497614562511444, + "learning_rate": 3.842971419035945e-06, + "loss": 0.07228851318359375, + "step": 11467 + }, + { + "epoch": 0.7751791266729755, + "grad_norm": 0.5670015811920166, + "learning_rate": 3.840766282606693e-06, + "loss": 0.11068344116210938, + "step": 11468 + }, + { + "epoch": 0.7752467216439097, + "grad_norm": 1.0884628295898438, + "learning_rate": 3.838561686130567e-06, + "loss": 0.17462158203125, + "step": 11469 + }, + { + "epoch": 0.7753143166148438, + "grad_norm": 0.3504782021045685, + "learning_rate": 3.836357629714235e-06, + "loss": 0.0684967041015625, + "step": 11470 + }, + { + "epoch": 0.7753819115857781, + "grad_norm": 0.6832392811775208, + "learning_rate": 3.834154113464354e-06, + "loss": 0.1190032958984375, + "step": 11471 + }, + { + "epoch": 0.7754495065567122, + "grad_norm": 1.1885995864868164, + "learning_rate": 3.83195113748753e-06, + "loss": 0.217376708984375, + "step": 11472 + }, + { + "epoch": 0.7755171015276463, + "grad_norm": 0.50079345703125, + "learning_rate": 3.829748701890372e-06, + "loss": 0.06863021850585938, + "step": 11473 + }, + { + "epoch": 0.7755846964985805, + "grad_norm": 0.44697287678718567, + "learning_rate": 3.827546806779429e-06, + "loss": 0.0677337646484375, + "step": 11474 + }, + { + "epoch": 0.7756522914695146, + "grad_norm": 0.26886802911758423, + "learning_rate": 3.825345452261257e-06, + "loss": 0.04010772705078125, + "step": 11475 + }, + { + "epoch": 0.7757198864404489, + "grad_norm": 0.6338828206062317, + "learning_rate": 3.82314463844237e-06, + "loss": 0.0978240966796875, + "step": 11476 + }, + { + "epoch": 0.775787481411383, + "grad_norm": 0.36807817220687866, + "learning_rate": 3.820944365429245e-06, + "loss": 0.05496025085449219, + "step": 11477 + }, + { + "epoch": 0.7758550763823172, + "grad_norm": 0.5625848770141602, + "learning_rate": 3.818744633328359e-06, + "loss": 0.1313934326171875, + "step": 11478 + }, + { + "epoch": 0.7759226713532513, + "grad_norm": 1.1746280193328857, + "learning_rate": 3.816545442246142e-06, + "loss": 0.18603515625, + "step": 11479 + }, + { + "epoch": 0.7759902663241854, + "grad_norm": 0.5198132395744324, + "learning_rate": 3.8143467922890068e-06, + "loss": 0.122283935546875, + "step": 11480 + }, + { + "epoch": 0.7760578612951197, + "grad_norm": 0.6152852177619934, + "learning_rate": 3.812148683563336e-06, + "loss": 0.09145355224609375, + "step": 11481 + }, + { + "epoch": 0.7761254562660538, + "grad_norm": 0.9719131588935852, + "learning_rate": 3.8099511161754884e-06, + "loss": 0.16790771484375, + "step": 11482 + }, + { + "epoch": 0.776193051236988, + "grad_norm": 0.7733492851257324, + "learning_rate": 3.807754090231792e-06, + "loss": 0.1664276123046875, + "step": 11483 + }, + { + "epoch": 0.7762606462079221, + "grad_norm": 0.6902892589569092, + "learning_rate": 3.805557605838565e-06, + "loss": 0.114349365234375, + "step": 11484 + }, + { + "epoch": 0.7763282411788562, + "grad_norm": 0.8470491170883179, + "learning_rate": 3.803361663102069e-06, + "loss": 0.113037109375, + "step": 11485 + }, + { + "epoch": 0.7763958361497905, + "grad_norm": 0.4783841073513031, + "learning_rate": 3.8011662621285757e-06, + "loss": 0.07573986053466797, + "step": 11486 + }, + { + "epoch": 0.7764634311207246, + "grad_norm": 1.037235140800476, + "learning_rate": 3.7989714030242955e-06, + "loss": 0.16426467895507812, + "step": 11487 + }, + { + "epoch": 0.7765310260916588, + "grad_norm": 1.4947630167007446, + "learning_rate": 3.7967770858954413e-06, + "loss": 0.15864944458007812, + "step": 11488 + }, + { + "epoch": 0.7765986210625929, + "grad_norm": 1.4783662557601929, + "learning_rate": 3.7945833108481886e-06, + "loss": 0.17389678955078125, + "step": 11489 + }, + { + "epoch": 0.7766662160335271, + "grad_norm": 0.2187425047159195, + "learning_rate": 3.7923900779886723e-06, + "loss": 0.04595947265625, + "step": 11490 + }, + { + "epoch": 0.7767338110044613, + "grad_norm": 0.32666388154029846, + "learning_rate": 3.7901973874230274e-06, + "loss": 0.04894256591796875, + "step": 11491 + }, + { + "epoch": 0.7768014059753954, + "grad_norm": 1.0878304243087769, + "learning_rate": 3.7880052392573456e-06, + "loss": 0.169097900390625, + "step": 11492 + }, + { + "epoch": 0.7768690009463296, + "grad_norm": 2.076033353805542, + "learning_rate": 3.7858136335976974e-06, + "loss": 0.1645355224609375, + "step": 11493 + }, + { + "epoch": 0.7769365959172637, + "grad_norm": 0.3368071913719177, + "learning_rate": 3.783622570550126e-06, + "loss": 0.06403350830078125, + "step": 11494 + }, + { + "epoch": 0.7770041908881979, + "grad_norm": 1.3952524662017822, + "learning_rate": 3.781432050220647e-06, + "loss": 0.216033935546875, + "step": 11495 + }, + { + "epoch": 0.7770717858591321, + "grad_norm": 0.45250415802001953, + "learning_rate": 3.7792420727152498e-06, + "loss": 0.08842086791992188, + "step": 11496 + }, + { + "epoch": 0.7771393808300663, + "grad_norm": 0.6995298862457275, + "learning_rate": 3.777052638139912e-06, + "loss": 0.09375381469726562, + "step": 11497 + }, + { + "epoch": 0.7772069758010004, + "grad_norm": 0.5332322716712952, + "learning_rate": 3.7748637466005528e-06, + "loss": 0.1055908203125, + "step": 11498 + }, + { + "epoch": 0.7772745707719345, + "grad_norm": 0.607282817363739, + "learning_rate": 3.7726753982030975e-06, + "loss": 0.1111602783203125, + "step": 11499 + }, + { + "epoch": 0.7773421657428687, + "grad_norm": 0.34014642238616943, + "learning_rate": 3.7704875930534346e-06, + "loss": 0.052173614501953125, + "step": 11500 + }, + { + "epoch": 0.7774097607138029, + "grad_norm": 0.8759884834289551, + "learning_rate": 3.7683003312574066e-06, + "loss": 0.14471435546875, + "step": 11501 + }, + { + "epoch": 0.7774773556847371, + "grad_norm": 0.45037028193473816, + "learning_rate": 3.766113612920868e-06, + "loss": 0.048381805419921875, + "step": 11502 + }, + { + "epoch": 0.7775449506556712, + "grad_norm": 0.5105987191200256, + "learning_rate": 3.7639274381496054e-06, + "loss": 0.07298564910888672, + "step": 11503 + }, + { + "epoch": 0.7776125456266054, + "grad_norm": 0.2748209238052368, + "learning_rate": 3.761741807049416e-06, + "loss": 0.0569915771484375, + "step": 11504 + }, + { + "epoch": 0.7776801405975395, + "grad_norm": 0.6109521389007568, + "learning_rate": 3.7595567197260468e-06, + "loss": 0.1098785400390625, + "step": 11505 + }, + { + "epoch": 0.7777477355684737, + "grad_norm": 0.47163844108581543, + "learning_rate": 3.7573721762852266e-06, + "loss": 0.07338142395019531, + "step": 11506 + }, + { + "epoch": 0.7778153305394079, + "grad_norm": 1.4308198690414429, + "learning_rate": 3.755188176832659e-06, + "loss": 0.2051544189453125, + "step": 11507 + }, + { + "epoch": 0.777882925510342, + "grad_norm": 0.2875174880027771, + "learning_rate": 3.7530047214740166e-06, + "loss": 0.04416656494140625, + "step": 11508 + }, + { + "epoch": 0.7779505204812762, + "grad_norm": 0.32324516773223877, + "learning_rate": 3.750821810314948e-06, + "loss": 0.05644989013671875, + "step": 11509 + }, + { + "epoch": 0.7780181154522103, + "grad_norm": 0.764007568359375, + "learning_rate": 3.748639443461085e-06, + "loss": 0.11954498291015625, + "step": 11510 + }, + { + "epoch": 0.7780857104231446, + "grad_norm": 1.1254407167434692, + "learning_rate": 3.7464576210180134e-06, + "loss": 0.12554168701171875, + "step": 11511 + }, + { + "epoch": 0.7781533053940787, + "grad_norm": 1.2958707809448242, + "learning_rate": 3.7442763430913022e-06, + "loss": 0.2036895751953125, + "step": 11512 + }, + { + "epoch": 0.7782209003650128, + "grad_norm": 1.0012359619140625, + "learning_rate": 3.742095609786509e-06, + "loss": 0.13268280029296875, + "step": 11513 + }, + { + "epoch": 0.778288495335947, + "grad_norm": 0.33588066697120667, + "learning_rate": 3.7399154212091336e-06, + "loss": 0.05268096923828125, + "step": 11514 + }, + { + "epoch": 0.7783560903068811, + "grad_norm": 1.3383440971374512, + "learning_rate": 3.737735777464685e-06, + "loss": 0.196258544921875, + "step": 11515 + }, + { + "epoch": 0.7784236852778154, + "grad_norm": 1.1397064924240112, + "learning_rate": 3.7355566786586097e-06, + "loss": 0.176422119140625, + "step": 11516 + }, + { + "epoch": 0.7784912802487495, + "grad_norm": 0.595039427280426, + "learning_rate": 3.733378124896361e-06, + "loss": 0.1345672607421875, + "step": 11517 + }, + { + "epoch": 0.7785588752196837, + "grad_norm": 0.22932562232017517, + "learning_rate": 3.731200116283343e-06, + "loss": 0.049530029296875, + "step": 11518 + }, + { + "epoch": 0.7786264701906178, + "grad_norm": 0.7275869250297546, + "learning_rate": 3.7290226529249433e-06, + "loss": 0.14312744140625, + "step": 11519 + }, + { + "epoch": 0.7786940651615519, + "grad_norm": 0.4572541117668152, + "learning_rate": 3.726845734926522e-06, + "loss": 0.08734893798828125, + "step": 11520 + }, + { + "epoch": 0.7787616601324862, + "grad_norm": 0.26474517583847046, + "learning_rate": 3.724669362393411e-06, + "loss": 0.04206085205078125, + "step": 11521 + }, + { + "epoch": 0.7788292551034203, + "grad_norm": 0.4886459410190582, + "learning_rate": 3.722493535430913e-06, + "loss": 0.0932769775390625, + "step": 11522 + }, + { + "epoch": 0.7788968500743545, + "grad_norm": 0.6509979963302612, + "learning_rate": 3.7203182541443197e-06, + "loss": 0.118011474609375, + "step": 11523 + }, + { + "epoch": 0.7789644450452886, + "grad_norm": 0.676645815372467, + "learning_rate": 3.7181435186388718e-06, + "loss": 0.11651611328125, + "step": 11524 + }, + { + "epoch": 0.7790320400162228, + "grad_norm": 1.3566340208053589, + "learning_rate": 3.7159693290197982e-06, + "loss": 0.1439971923828125, + "step": 11525 + }, + { + "epoch": 0.779099634987157, + "grad_norm": 0.6745683550834656, + "learning_rate": 3.7137956853923116e-06, + "loss": 0.09392547607421875, + "step": 11526 + }, + { + "epoch": 0.7791672299580911, + "grad_norm": 0.3442915976047516, + "learning_rate": 3.7116225878615694e-06, + "loss": 0.0645294189453125, + "step": 11527 + }, + { + "epoch": 0.7792348249290253, + "grad_norm": 0.5363792181015015, + "learning_rate": 3.7094500365327327e-06, + "loss": 0.09164047241210938, + "step": 11528 + }, + { + "epoch": 0.7793024198999594, + "grad_norm": 0.35241812467575073, + "learning_rate": 3.7072780315109172e-06, + "loss": 0.059131622314453125, + "step": 11529 + }, + { + "epoch": 0.7793700148708936, + "grad_norm": 0.6368064880371094, + "learning_rate": 3.705106572901219e-06, + "loss": 0.1157989501953125, + "step": 11530 + }, + { + "epoch": 0.7794376098418277, + "grad_norm": 0.38356542587280273, + "learning_rate": 3.702935660808709e-06, + "loss": 0.06855010986328125, + "step": 11531 + }, + { + "epoch": 0.779505204812762, + "grad_norm": 0.5762079954147339, + "learning_rate": 3.7007652953384263e-06, + "loss": 0.1282501220703125, + "step": 11532 + }, + { + "epoch": 0.7795727997836961, + "grad_norm": 1.1169259548187256, + "learning_rate": 3.6985954765953872e-06, + "loss": 0.1626739501953125, + "step": 11533 + }, + { + "epoch": 0.7796403947546302, + "grad_norm": 0.5432459712028503, + "learning_rate": 3.6964262046845827e-06, + "loss": 0.105865478515625, + "step": 11534 + }, + { + "epoch": 0.7797079897255644, + "grad_norm": 0.4453165531158447, + "learning_rate": 3.6942574797109736e-06, + "loss": 0.0853424072265625, + "step": 11535 + }, + { + "epoch": 0.7797755846964985, + "grad_norm": 0.8537445664405823, + "learning_rate": 3.6920893017794976e-06, + "loss": 0.1422271728515625, + "step": 11536 + }, + { + "epoch": 0.7798431796674328, + "grad_norm": 0.9761715531349182, + "learning_rate": 3.6899216709950643e-06, + "loss": 0.1815948486328125, + "step": 11537 + }, + { + "epoch": 0.7799107746383669, + "grad_norm": 0.7657521367073059, + "learning_rate": 3.6877545874625535e-06, + "loss": 0.1372222900390625, + "step": 11538 + }, + { + "epoch": 0.7799783696093011, + "grad_norm": 0.35034048557281494, + "learning_rate": 3.685588051286834e-06, + "loss": 0.041561126708984375, + "step": 11539 + }, + { + "epoch": 0.7800459645802352, + "grad_norm": 0.6722325682640076, + "learning_rate": 3.6834220625727204e-06, + "loss": 0.10401153564453125, + "step": 11540 + }, + { + "epoch": 0.7801135595511693, + "grad_norm": 0.9015921354293823, + "learning_rate": 3.6812566214250275e-06, + "loss": 0.129852294921875, + "step": 11541 + }, + { + "epoch": 0.7801811545221036, + "grad_norm": 0.667746365070343, + "learning_rate": 3.6790917279485297e-06, + "loss": 0.09111785888671875, + "step": 11542 + }, + { + "epoch": 0.7802487494930377, + "grad_norm": 0.4925486445426941, + "learning_rate": 3.67692738224798e-06, + "loss": 0.08587646484375, + "step": 11543 + }, + { + "epoch": 0.7803163444639719, + "grad_norm": 0.9941389560699463, + "learning_rate": 3.674763584428099e-06, + "loss": 0.1937408447265625, + "step": 11544 + }, + { + "epoch": 0.780383939434906, + "grad_norm": 1.2668129205703735, + "learning_rate": 3.672600334593587e-06, + "loss": 0.240936279296875, + "step": 11545 + }, + { + "epoch": 0.7804515344058403, + "grad_norm": 0.35263633728027344, + "learning_rate": 3.670437632849115e-06, + "loss": 0.07135009765625, + "step": 11546 + }, + { + "epoch": 0.7805191293767744, + "grad_norm": 0.5214504599571228, + "learning_rate": 3.6682754792993294e-06, + "loss": 0.1222381591796875, + "step": 11547 + }, + { + "epoch": 0.7805867243477085, + "grad_norm": 0.43265262246131897, + "learning_rate": 3.6661138740488463e-06, + "loss": 0.06644439697265625, + "step": 11548 + }, + { + "epoch": 0.7806543193186427, + "grad_norm": 0.8643597960472107, + "learning_rate": 3.663952817202259e-06, + "loss": 0.109130859375, + "step": 11549 + }, + { + "epoch": 0.7807219142895768, + "grad_norm": 0.5490498542785645, + "learning_rate": 3.661792308864132e-06, + "loss": 0.0940704345703125, + "step": 11550 + }, + { + "epoch": 0.780789509260511, + "grad_norm": 0.8890361785888672, + "learning_rate": 3.6596323491390015e-06, + "loss": 0.13297271728515625, + "step": 11551 + }, + { + "epoch": 0.7808571042314452, + "grad_norm": 0.6729894280433655, + "learning_rate": 3.657472938131391e-06, + "loss": 0.120086669921875, + "step": 11552 + }, + { + "epoch": 0.7809246992023794, + "grad_norm": 0.650873601436615, + "learning_rate": 3.655314075945768e-06, + "loss": 0.1321563720703125, + "step": 11553 + }, + { + "epoch": 0.7809922941733135, + "grad_norm": 0.7928658723831177, + "learning_rate": 3.653155762686608e-06, + "loss": 0.10979843139648438, + "step": 11554 + }, + { + "epoch": 0.7810598891442476, + "grad_norm": 0.5320805907249451, + "learning_rate": 3.6509979984583367e-06, + "loss": 0.09234619140625, + "step": 11555 + }, + { + "epoch": 0.7811274841151818, + "grad_norm": 0.6494277119636536, + "learning_rate": 3.6488407833653612e-06, + "loss": 0.1278839111328125, + "step": 11556 + }, + { + "epoch": 0.781195079086116, + "grad_norm": 0.8857519626617432, + "learning_rate": 3.6466841175120615e-06, + "loss": 0.18115234375, + "step": 11557 + }, + { + "epoch": 0.7812626740570502, + "grad_norm": 0.7530543208122253, + "learning_rate": 3.6445280010027883e-06, + "loss": 0.1306915283203125, + "step": 11558 + }, + { + "epoch": 0.7813302690279843, + "grad_norm": 1.0853723287582397, + "learning_rate": 3.64237243394187e-06, + "loss": 0.177154541015625, + "step": 11559 + }, + { + "epoch": 0.7813978639989185, + "grad_norm": 0.2741803228855133, + "learning_rate": 3.6402174164336054e-06, + "loss": 0.044281959533691406, + "step": 11560 + }, + { + "epoch": 0.7814654589698526, + "grad_norm": 1.4554853439331055, + "learning_rate": 3.6380629485822676e-06, + "loss": 0.1735076904296875, + "step": 11561 + }, + { + "epoch": 0.7815330539407868, + "grad_norm": 0.40390151739120483, + "learning_rate": 3.6359090304921013e-06, + "loss": 0.07007980346679688, + "step": 11562 + }, + { + "epoch": 0.781600648911721, + "grad_norm": 0.4296080470085144, + "learning_rate": 3.6337556622673348e-06, + "loss": 0.089202880859375, + "step": 11563 + }, + { + "epoch": 0.7816682438826551, + "grad_norm": 0.8603362441062927, + "learning_rate": 3.631602844012148e-06, + "loss": 0.147308349609375, + "step": 11564 + }, + { + "epoch": 0.7817358388535893, + "grad_norm": 0.31933465600013733, + "learning_rate": 3.629450575830723e-06, + "loss": 0.05806732177734375, + "step": 11565 + }, + { + "epoch": 0.7818034338245234, + "grad_norm": 0.9955199956893921, + "learning_rate": 3.6272988578271827e-06, + "loss": 0.1687774658203125, + "step": 11566 + }, + { + "epoch": 0.7818710287954577, + "grad_norm": 0.5397214293479919, + "learning_rate": 3.625147690105654e-06, + "loss": 0.083587646484375, + "step": 11567 + }, + { + "epoch": 0.7819386237663918, + "grad_norm": 1.0629866123199463, + "learning_rate": 3.6229970727702204e-06, + "loss": 0.180908203125, + "step": 11568 + }, + { + "epoch": 0.7820062187373259, + "grad_norm": 0.9280948042869568, + "learning_rate": 3.6208470059249404e-06, + "loss": 0.1195068359375, + "step": 11569 + }, + { + "epoch": 0.7820738137082601, + "grad_norm": 0.2572905719280243, + "learning_rate": 3.618697489673849e-06, + "loss": 0.024600982666015625, + "step": 11570 + }, + { + "epoch": 0.7821414086791942, + "grad_norm": 0.16508644819259644, + "learning_rate": 3.6165485241209528e-06, + "loss": 0.02254009246826172, + "step": 11571 + }, + { + "epoch": 0.7822090036501285, + "grad_norm": 0.826690673828125, + "learning_rate": 3.614400109370232e-06, + "loss": 0.186187744140625, + "step": 11572 + }, + { + "epoch": 0.7822765986210626, + "grad_norm": 1.7125605344772339, + "learning_rate": 3.6122522455256407e-06, + "loss": 0.18558120727539062, + "step": 11573 + }, + { + "epoch": 0.7823441935919968, + "grad_norm": 0.5187171697616577, + "learning_rate": 3.6101049326911036e-06, + "loss": 0.090118408203125, + "step": 11574 + }, + { + "epoch": 0.7824117885629309, + "grad_norm": 0.3783629238605499, + "learning_rate": 3.607958170970519e-06, + "loss": 0.0796051025390625, + "step": 11575 + }, + { + "epoch": 0.782479383533865, + "grad_norm": 0.4003176987171173, + "learning_rate": 3.605811960467774e-06, + "loss": 0.050067901611328125, + "step": 11576 + }, + { + "epoch": 0.7825469785047993, + "grad_norm": 0.6965991258621216, + "learning_rate": 3.6036663012866977e-06, + "loss": 0.1574554443359375, + "step": 11577 + }, + { + "epoch": 0.7826145734757334, + "grad_norm": 1.2042205333709717, + "learning_rate": 3.6015211935311264e-06, + "loss": 0.18701171875, + "step": 11578 + }, + { + "epoch": 0.7826821684466676, + "grad_norm": 1.5811675786972046, + "learning_rate": 3.5993766373048375e-06, + "loss": 0.23980712890625, + "step": 11579 + }, + { + "epoch": 0.7827497634176017, + "grad_norm": 0.5605099201202393, + "learning_rate": 3.5972326327116113e-06, + "loss": 0.09317779541015625, + "step": 11580 + }, + { + "epoch": 0.7828173583885359, + "grad_norm": 1.0522221326828003, + "learning_rate": 3.5950891798551877e-06, + "loss": 0.1841583251953125, + "step": 11581 + }, + { + "epoch": 0.7828849533594701, + "grad_norm": 0.3967849016189575, + "learning_rate": 3.5929462788392677e-06, + "loss": 0.07852935791015625, + "step": 11582 + }, + { + "epoch": 0.7829525483304042, + "grad_norm": 0.14806222915649414, + "learning_rate": 3.5908039297675492e-06, + "loss": 0.014332771301269531, + "step": 11583 + }, + { + "epoch": 0.7830201433013384, + "grad_norm": 0.6774393320083618, + "learning_rate": 3.588662132743692e-06, + "loss": 0.09279632568359375, + "step": 11584 + }, + { + "epoch": 0.7830877382722725, + "grad_norm": 0.6095924973487854, + "learning_rate": 3.586520887871326e-06, + "loss": 0.1190032958984375, + "step": 11585 + }, + { + "epoch": 0.7831553332432067, + "grad_norm": 0.8707983493804932, + "learning_rate": 3.584380195254058e-06, + "loss": 0.1259307861328125, + "step": 11586 + }, + { + "epoch": 0.7832229282141409, + "grad_norm": 0.4231036901473999, + "learning_rate": 3.5822400549954704e-06, + "loss": 0.06793212890625, + "step": 11587 + }, + { + "epoch": 0.7832905231850751, + "grad_norm": 0.8608715534210205, + "learning_rate": 3.5801004671991107e-06, + "loss": 0.15411376953125, + "step": 11588 + }, + { + "epoch": 0.7833581181560092, + "grad_norm": 0.6573739647865295, + "learning_rate": 3.577961431968519e-06, + "loss": 0.1165924072265625, + "step": 11589 + }, + { + "epoch": 0.7834257131269433, + "grad_norm": 0.32643356919288635, + "learning_rate": 3.5758229494071775e-06, + "loss": 0.05672454833984375, + "step": 11590 + }, + { + "epoch": 0.7834933080978775, + "grad_norm": 0.2951398491859436, + "learning_rate": 3.5736850196185705e-06, + "loss": 0.046215057373046875, + "step": 11591 + }, + { + "epoch": 0.7835609030688117, + "grad_norm": 0.944405734539032, + "learning_rate": 3.5715476427061433e-06, + "loss": 0.110107421875, + "step": 11592 + }, + { + "epoch": 0.7836284980397459, + "grad_norm": 1.1545828580856323, + "learning_rate": 3.5694108187733133e-06, + "loss": 0.15724945068359375, + "step": 11593 + }, + { + "epoch": 0.78369609301068, + "grad_norm": 0.48869919776916504, + "learning_rate": 3.567274547923479e-06, + "loss": 0.08542633056640625, + "step": 11594 + }, + { + "epoch": 0.7837636879816142, + "grad_norm": 0.4837450683116913, + "learning_rate": 3.5651388302599934e-06, + "loss": 0.089752197265625, + "step": 11595 + }, + { + "epoch": 0.7838312829525483, + "grad_norm": 0.15040774643421173, + "learning_rate": 3.563003665886207e-06, + "loss": 0.022825241088867188, + "step": 11596 + }, + { + "epoch": 0.7838988779234825, + "grad_norm": 1.1248574256896973, + "learning_rate": 3.5608690549054285e-06, + "loss": 0.1536712646484375, + "step": 11597 + }, + { + "epoch": 0.7839664728944167, + "grad_norm": 0.7021012306213379, + "learning_rate": 3.5587349974209454e-06, + "loss": 0.14440155029296875, + "step": 11598 + }, + { + "epoch": 0.7840340678653508, + "grad_norm": 1.9150835275650024, + "learning_rate": 3.5566014935360153e-06, + "loss": 0.1832275390625, + "step": 11599 + }, + { + "epoch": 0.784101662836285, + "grad_norm": 1.113215446472168, + "learning_rate": 3.5544685433538704e-06, + "loss": 0.1395721435546875, + "step": 11600 + }, + { + "epoch": 0.7841692578072191, + "grad_norm": 0.4901430010795593, + "learning_rate": 3.552336146977713e-06, + "loss": 0.07901763916015625, + "step": 11601 + }, + { + "epoch": 0.7842368527781534, + "grad_norm": 1.11262845993042, + "learning_rate": 3.5502043045107328e-06, + "loss": 0.118011474609375, + "step": 11602 + }, + { + "epoch": 0.7843044477490875, + "grad_norm": 1.0202614068984985, + "learning_rate": 3.5480730160560654e-06, + "loss": 0.10993194580078125, + "step": 11603 + }, + { + "epoch": 0.7843720427200216, + "grad_norm": 0.39061692357063293, + "learning_rate": 3.545942281716848e-06, + "loss": 0.053821563720703125, + "step": 11604 + }, + { + "epoch": 0.7844396376909558, + "grad_norm": 0.4995240569114685, + "learning_rate": 3.54381210159618e-06, + "loss": 0.07529449462890625, + "step": 11605 + }, + { + "epoch": 0.7845072326618899, + "grad_norm": 0.535030722618103, + "learning_rate": 3.5416824757971206e-06, + "loss": 0.09043121337890625, + "step": 11606 + }, + { + "epoch": 0.7845748276328242, + "grad_norm": 1.0025601387023926, + "learning_rate": 3.5395534044227274e-06, + "loss": 0.141693115234375, + "step": 11607 + }, + { + "epoch": 0.7846424226037583, + "grad_norm": 0.6547766923904419, + "learning_rate": 3.537424887576006e-06, + "loss": 0.10565948486328125, + "step": 11608 + }, + { + "epoch": 0.7847100175746925, + "grad_norm": 0.43834659457206726, + "learning_rate": 3.5352969253599564e-06, + "loss": 0.0869598388671875, + "step": 11609 + }, + { + "epoch": 0.7847776125456266, + "grad_norm": 0.5356652736663818, + "learning_rate": 3.5331695178775407e-06, + "loss": 0.11473846435546875, + "step": 11610 + }, + { + "epoch": 0.7848452075165607, + "grad_norm": 0.1914171725511551, + "learning_rate": 3.5310426652316934e-06, + "loss": 0.026000022888183594, + "step": 11611 + }, + { + "epoch": 0.784912802487495, + "grad_norm": 1.482559084892273, + "learning_rate": 3.528916367525329e-06, + "loss": 0.1973114013671875, + "step": 11612 + }, + { + "epoch": 0.7849803974584291, + "grad_norm": 1.032850742340088, + "learning_rate": 3.5267906248613263e-06, + "loss": 0.116546630859375, + "step": 11613 + }, + { + "epoch": 0.7850479924293633, + "grad_norm": 1.0514851808547974, + "learning_rate": 3.524665437342542e-06, + "loss": 0.14141082763671875, + "step": 11614 + }, + { + "epoch": 0.7851155874002974, + "grad_norm": 0.5957800149917603, + "learning_rate": 3.522540805071817e-06, + "loss": 0.09877395629882812, + "step": 11615 + }, + { + "epoch": 0.7851831823712315, + "grad_norm": 0.9579742550849915, + "learning_rate": 3.520416728151935e-06, + "loss": 0.142791748046875, + "step": 11616 + }, + { + "epoch": 0.7852507773421658, + "grad_norm": 0.6602728962898254, + "learning_rate": 3.5182932066856867e-06, + "loss": 0.09655380249023438, + "step": 11617 + }, + { + "epoch": 0.7853183723130999, + "grad_norm": 1.1110788583755493, + "learning_rate": 3.5161702407758224e-06, + "loss": 0.10602951049804688, + "step": 11618 + }, + { + "epoch": 0.7853859672840341, + "grad_norm": 0.7508178353309631, + "learning_rate": 3.51404783052505e-06, + "loss": 0.11248779296875, + "step": 11619 + }, + { + "epoch": 0.7854535622549682, + "grad_norm": 1.1468439102172852, + "learning_rate": 3.5119259760360794e-06, + "loss": 0.203643798828125, + "step": 11620 + }, + { + "epoch": 0.7855211572259024, + "grad_norm": 1.0835559368133545, + "learning_rate": 3.509804677411572e-06, + "loss": 0.11396026611328125, + "step": 11621 + }, + { + "epoch": 0.7855887521968365, + "grad_norm": 0.9983596801757812, + "learning_rate": 3.507683934754173e-06, + "loss": 0.166900634765625, + "step": 11622 + }, + { + "epoch": 0.7856563471677707, + "grad_norm": 0.7732254862785339, + "learning_rate": 3.5055637481664925e-06, + "loss": 0.09320831298828125, + "step": 11623 + }, + { + "epoch": 0.7857239421387049, + "grad_norm": 0.7778723239898682, + "learning_rate": 3.503444117751122e-06, + "loss": 0.09964752197265625, + "step": 11624 + }, + { + "epoch": 0.785791537109639, + "grad_norm": 0.7108729481697083, + "learning_rate": 3.5013250436106232e-06, + "loss": 0.1444091796875, + "step": 11625 + }, + { + "epoch": 0.7858591320805732, + "grad_norm": 0.5478013753890991, + "learning_rate": 3.4992065258475265e-06, + "loss": 0.08968353271484375, + "step": 11626 + }, + { + "epoch": 0.7859267270515073, + "grad_norm": 0.7991319894790649, + "learning_rate": 3.497088564564337e-06, + "loss": 0.136688232421875, + "step": 11627 + }, + { + "epoch": 0.7859943220224416, + "grad_norm": 0.18265622854232788, + "learning_rate": 3.4949711598635476e-06, + "loss": 0.029798507690429688, + "step": 11628 + }, + { + "epoch": 0.7860619169933757, + "grad_norm": 0.29587164521217346, + "learning_rate": 3.4928543118475962e-06, + "loss": 0.032482147216796875, + "step": 11629 + }, + { + "epoch": 0.7861295119643098, + "grad_norm": 0.512596845626831, + "learning_rate": 3.490738020618913e-06, + "loss": 0.1031646728515625, + "step": 11630 + }, + { + "epoch": 0.786197106935244, + "grad_norm": 1.1763324737548828, + "learning_rate": 3.4886222862799076e-06, + "loss": 0.12445068359375, + "step": 11631 + }, + { + "epoch": 0.7862647019061781, + "grad_norm": 0.38623884320259094, + "learning_rate": 3.4865071089329364e-06, + "loss": 0.06693267822265625, + "step": 11632 + }, + { + "epoch": 0.7863322968771124, + "grad_norm": 0.6331127285957336, + "learning_rate": 3.4843924886803575e-06, + "loss": 0.098876953125, + "step": 11633 + }, + { + "epoch": 0.7863998918480465, + "grad_norm": 0.8134248852729797, + "learning_rate": 3.482278425624484e-06, + "loss": 0.1151275634765625, + "step": 11634 + }, + { + "epoch": 0.7864674868189807, + "grad_norm": 0.6171020269393921, + "learning_rate": 3.480164919867607e-06, + "loss": 0.1268157958984375, + "step": 11635 + }, + { + "epoch": 0.7865350817899148, + "grad_norm": 1.14998459815979, + "learning_rate": 3.478051971511994e-06, + "loss": 0.1317138671875, + "step": 11636 + }, + { + "epoch": 0.7866026767608489, + "grad_norm": 0.40788355469703674, + "learning_rate": 3.475939580659881e-06, + "loss": 0.07604217529296875, + "step": 11637 + }, + { + "epoch": 0.7866702717317832, + "grad_norm": 0.8737806081771851, + "learning_rate": 3.4738277474134745e-06, + "loss": 0.121337890625, + "step": 11638 + }, + { + "epoch": 0.7867378667027173, + "grad_norm": 0.5133650898933411, + "learning_rate": 3.4717164718749692e-06, + "loss": 0.09234619140625, + "step": 11639 + }, + { + "epoch": 0.7868054616736515, + "grad_norm": 0.9224525094032288, + "learning_rate": 3.469605754146511e-06, + "loss": 0.14495849609375, + "step": 11640 + }, + { + "epoch": 0.7868730566445856, + "grad_norm": 1.4138144254684448, + "learning_rate": 3.467495594330232e-06, + "loss": 0.1827545166015625, + "step": 11641 + }, + { + "epoch": 0.7869406516155198, + "grad_norm": 1.0765306949615479, + "learning_rate": 3.4653859925282384e-06, + "loss": 0.15814208984375, + "step": 11642 + }, + { + "epoch": 0.787008246586454, + "grad_norm": 0.20067007839679718, + "learning_rate": 3.4632769488425976e-06, + "loss": 0.027402877807617188, + "step": 11643 + }, + { + "epoch": 0.7870758415573881, + "grad_norm": 1.6693685054779053, + "learning_rate": 3.4611684633753733e-06, + "loss": 0.239959716796875, + "step": 11644 + }, + { + "epoch": 0.7871434365283223, + "grad_norm": 0.4159151315689087, + "learning_rate": 3.459060536228569e-06, + "loss": 0.07440185546875, + "step": 11645 + }, + { + "epoch": 0.7872110314992564, + "grad_norm": 0.4163703918457031, + "learning_rate": 3.4569531675041927e-06, + "loss": 0.06899261474609375, + "step": 11646 + }, + { + "epoch": 0.7872786264701906, + "grad_norm": 0.7737632393836975, + "learning_rate": 3.4548463573042067e-06, + "loss": 0.1335601806640625, + "step": 11647 + }, + { + "epoch": 0.7873462214411248, + "grad_norm": 0.9743947386741638, + "learning_rate": 3.452740105730554e-06, + "loss": 0.0818328857421875, + "step": 11648 + }, + { + "epoch": 0.787413816412059, + "grad_norm": 1.0258944034576416, + "learning_rate": 3.450634412885145e-06, + "loss": 0.15386962890625, + "step": 11649 + }, + { + "epoch": 0.7874814113829931, + "grad_norm": 0.4877266585826874, + "learning_rate": 3.4485292788698704e-06, + "loss": 0.090118408203125, + "step": 11650 + }, + { + "epoch": 0.7875490063539272, + "grad_norm": 0.7471238374710083, + "learning_rate": 3.4464247037865807e-06, + "loss": 0.1089324951171875, + "step": 11651 + }, + { + "epoch": 0.7876166013248614, + "grad_norm": 0.44588860869407654, + "learning_rate": 3.444320687737124e-06, + "loss": 0.08582305908203125, + "step": 11652 + }, + { + "epoch": 0.7876841962957956, + "grad_norm": 1.3044384717941284, + "learning_rate": 3.4422172308232918e-06, + "loss": 0.195404052734375, + "step": 11653 + }, + { + "epoch": 0.7877517912667298, + "grad_norm": 0.47230443358421326, + "learning_rate": 3.4401143331468676e-06, + "loss": 0.08888626098632812, + "step": 11654 + }, + { + "epoch": 0.7878193862376639, + "grad_norm": 0.35080838203430176, + "learning_rate": 3.4380119948096018e-06, + "loss": 0.056182861328125, + "step": 11655 + }, + { + "epoch": 0.7878869812085981, + "grad_norm": 1.5962607860565186, + "learning_rate": 3.4359102159132145e-06, + "loss": 0.1337738037109375, + "step": 11656 + }, + { + "epoch": 0.7879545761795322, + "grad_norm": 0.9810585975646973, + "learning_rate": 3.4338089965594162e-06, + "loss": 0.13729476928710938, + "step": 11657 + }, + { + "epoch": 0.7880221711504664, + "grad_norm": 0.5756601095199585, + "learning_rate": 3.4317083368498585e-06, + "loss": 0.09739303588867188, + "step": 11658 + }, + { + "epoch": 0.7880897661214006, + "grad_norm": 0.7684158086776733, + "learning_rate": 3.4296082368861993e-06, + "loss": 0.10213088989257812, + "step": 11659 + }, + { + "epoch": 0.7881573610923347, + "grad_norm": 0.41095924377441406, + "learning_rate": 3.4275086967700482e-06, + "loss": 0.09050750732421875, + "step": 11660 + }, + { + "epoch": 0.7882249560632689, + "grad_norm": 0.3747859299182892, + "learning_rate": 3.425409716602996e-06, + "loss": 0.04312896728515625, + "step": 11661 + }, + { + "epoch": 0.788292551034203, + "grad_norm": 1.295719861984253, + "learning_rate": 3.4233112964866024e-06, + "loss": 0.16690826416015625, + "step": 11662 + }, + { + "epoch": 0.7883601460051373, + "grad_norm": 0.28395113348960876, + "learning_rate": 3.4212134365224025e-06, + "loss": 0.05518341064453125, + "step": 11663 + }, + { + "epoch": 0.7884277409760714, + "grad_norm": 0.3451017737388611, + "learning_rate": 3.4191161368119057e-06, + "loss": 0.05596923828125, + "step": 11664 + }, + { + "epoch": 0.7884953359470055, + "grad_norm": 0.49943283200263977, + "learning_rate": 3.4170193974565893e-06, + "loss": 0.0852203369140625, + "step": 11665 + }, + { + "epoch": 0.7885629309179397, + "grad_norm": 0.2558560371398926, + "learning_rate": 3.4149232185579095e-06, + "loss": 0.04338836669921875, + "step": 11666 + }, + { + "epoch": 0.7886305258888738, + "grad_norm": 1.1472643613815308, + "learning_rate": 3.412827600217286e-06, + "loss": 0.22271728515625, + "step": 11667 + }, + { + "epoch": 0.7886981208598081, + "grad_norm": 0.2501743733882904, + "learning_rate": 3.410732542536133e-06, + "loss": 0.0307159423828125, + "step": 11668 + }, + { + "epoch": 0.7887657158307422, + "grad_norm": 0.39300888776779175, + "learning_rate": 3.408638045615803e-06, + "loss": 0.085723876953125, + "step": 11669 + }, + { + "epoch": 0.7888333108016764, + "grad_norm": 0.5366352796554565, + "learning_rate": 3.406544109557658e-06, + "loss": 0.105712890625, + "step": 11670 + }, + { + "epoch": 0.7889009057726105, + "grad_norm": 1.6278926134109497, + "learning_rate": 3.4044507344630017e-06, + "loss": 0.16239166259765625, + "step": 11671 + }, + { + "epoch": 0.7889685007435446, + "grad_norm": 0.7132269740104675, + "learning_rate": 3.4023579204331333e-06, + "loss": 0.1359710693359375, + "step": 11672 + }, + { + "epoch": 0.7890360957144789, + "grad_norm": 0.773537814617157, + "learning_rate": 3.400265667569314e-06, + "loss": 0.14703369140625, + "step": 11673 + }, + { + "epoch": 0.789103690685413, + "grad_norm": 0.5003921985626221, + "learning_rate": 3.398173975972779e-06, + "loss": 0.1143951416015625, + "step": 11674 + }, + { + "epoch": 0.7891712856563472, + "grad_norm": 0.21474038064479828, + "learning_rate": 3.3960828457447384e-06, + "loss": 0.026382923126220703, + "step": 11675 + }, + { + "epoch": 0.7892388806272813, + "grad_norm": 0.6470696926116943, + "learning_rate": 3.393992276986374e-06, + "loss": 0.0991668701171875, + "step": 11676 + }, + { + "epoch": 0.7893064755982155, + "grad_norm": 0.6660904288291931, + "learning_rate": 3.391902269798841e-06, + "loss": 0.1379241943359375, + "step": 11677 + }, + { + "epoch": 0.7893740705691497, + "grad_norm": 0.42080387473106384, + "learning_rate": 3.3898128242832653e-06, + "loss": 0.07801055908203125, + "step": 11678 + }, + { + "epoch": 0.7894416655400838, + "grad_norm": 0.7980103492736816, + "learning_rate": 3.3877239405407477e-06, + "loss": 0.1630401611328125, + "step": 11679 + }, + { + "epoch": 0.789509260511018, + "grad_norm": 0.5945611000061035, + "learning_rate": 3.385635618672359e-06, + "loss": 0.1177520751953125, + "step": 11680 + }, + { + "epoch": 0.7895768554819521, + "grad_norm": 0.9879537224769592, + "learning_rate": 3.383547858779155e-06, + "loss": 0.1328887939453125, + "step": 11681 + }, + { + "epoch": 0.7896444504528863, + "grad_norm": 0.8241769075393677, + "learning_rate": 3.3814606609621402e-06, + "loss": 0.1264495849609375, + "step": 11682 + }, + { + "epoch": 0.7897120454238205, + "grad_norm": 0.8448312878608704, + "learning_rate": 3.3793740253223167e-06, + "loss": 0.16265869140625, + "step": 11683 + }, + { + "epoch": 0.7897796403947547, + "grad_norm": 0.8837867379188538, + "learning_rate": 3.377287951960647e-06, + "loss": 0.157562255859375, + "step": 11684 + }, + { + "epoch": 0.7898472353656888, + "grad_norm": 1.4804564714431763, + "learning_rate": 3.375202440978066e-06, + "loss": 0.1925048828125, + "step": 11685 + }, + { + "epoch": 0.7899148303366229, + "grad_norm": 0.43067309260368347, + "learning_rate": 3.373117492475485e-06, + "loss": 0.06441497802734375, + "step": 11686 + }, + { + "epoch": 0.7899824253075571, + "grad_norm": 1.4010560512542725, + "learning_rate": 3.3710331065537868e-06, + "loss": 0.16204833984375, + "step": 11687 + }, + { + "epoch": 0.7900500202784912, + "grad_norm": 0.9229772686958313, + "learning_rate": 3.3689492833138263e-06, + "loss": 0.1577606201171875, + "step": 11688 + }, + { + "epoch": 0.7901176152494255, + "grad_norm": 0.8715419769287109, + "learning_rate": 3.366866022856433e-06, + "loss": 0.11884689331054688, + "step": 11689 + }, + { + "epoch": 0.7901852102203596, + "grad_norm": 0.5285065770149231, + "learning_rate": 3.3647833252824062e-06, + "loss": 0.114105224609375, + "step": 11690 + }, + { + "epoch": 0.7902528051912938, + "grad_norm": 0.26443561911582947, + "learning_rate": 3.3627011906925205e-06, + "loss": 0.05144500732421875, + "step": 11691 + }, + { + "epoch": 0.7903204001622279, + "grad_norm": 0.4122135043144226, + "learning_rate": 3.360619619187523e-06, + "loss": 0.052947998046875, + "step": 11692 + }, + { + "epoch": 0.790387995133162, + "grad_norm": 0.8593537211418152, + "learning_rate": 3.358538610868131e-06, + "loss": 0.0836639404296875, + "step": 11693 + }, + { + "epoch": 0.7904555901040963, + "grad_norm": 0.9728485941886902, + "learning_rate": 3.3564581658350446e-06, + "loss": 0.12709808349609375, + "step": 11694 + }, + { + "epoch": 0.7905231850750304, + "grad_norm": 0.9769455194473267, + "learning_rate": 3.3543782841889138e-06, + "loss": 0.141876220703125, + "step": 11695 + }, + { + "epoch": 0.7905907800459646, + "grad_norm": 0.7940779328346252, + "learning_rate": 3.352298966030391e-06, + "loss": 0.08042144775390625, + "step": 11696 + }, + { + "epoch": 0.7906583750168987, + "grad_norm": 0.20940300822257996, + "learning_rate": 3.3502202114600794e-06, + "loss": 0.032265663146972656, + "step": 11697 + }, + { + "epoch": 0.790725969987833, + "grad_norm": 1.122137427330017, + "learning_rate": 3.3481420205785618e-06, + "loss": 0.11136627197265625, + "step": 11698 + }, + { + "epoch": 0.7907935649587671, + "grad_norm": 0.172761470079422, + "learning_rate": 3.346064393486401e-06, + "loss": 0.027378082275390625, + "step": 11699 + }, + { + "epoch": 0.7908611599297012, + "grad_norm": 0.30013418197631836, + "learning_rate": 3.343987330284112e-06, + "loss": 0.0602874755859375, + "step": 11700 + }, + { + "epoch": 0.7909287549006354, + "grad_norm": 0.42726221680641174, + "learning_rate": 3.3419108310722067e-06, + "loss": 0.0570068359375, + "step": 11701 + }, + { + "epoch": 0.7909963498715695, + "grad_norm": 0.30901971459388733, + "learning_rate": 3.3398348959511577e-06, + "loss": 0.060214996337890625, + "step": 11702 + }, + { + "epoch": 0.7910639448425038, + "grad_norm": 0.9256018400192261, + "learning_rate": 3.3377595250214105e-06, + "loss": 0.151611328125, + "step": 11703 + }, + { + "epoch": 0.7911315398134379, + "grad_norm": 0.15731239318847656, + "learning_rate": 3.335684718383383e-06, + "loss": 0.01636505126953125, + "step": 11704 + }, + { + "epoch": 0.7911991347843721, + "grad_norm": 1.0068196058273315, + "learning_rate": 3.3336104761374692e-06, + "loss": 0.12725448608398438, + "step": 11705 + }, + { + "epoch": 0.7912667297553062, + "grad_norm": 0.5529266595840454, + "learning_rate": 3.331536798384028e-06, + "loss": 0.11052703857421875, + "step": 11706 + }, + { + "epoch": 0.7913343247262403, + "grad_norm": 0.7255222201347351, + "learning_rate": 3.329463685223411e-06, + "loss": 0.09322357177734375, + "step": 11707 + }, + { + "epoch": 0.7914019196971745, + "grad_norm": 0.7995158433914185, + "learning_rate": 3.327391136755909e-06, + "loss": 0.1370391845703125, + "step": 11708 + }, + { + "epoch": 0.7914695146681087, + "grad_norm": 0.2552616596221924, + "learning_rate": 3.325319153081821e-06, + "loss": 0.045253753662109375, + "step": 11709 + }, + { + "epoch": 0.7915371096390429, + "grad_norm": 0.29611366987228394, + "learning_rate": 3.3232477343014e-06, + "loss": 0.05444145202636719, + "step": 11710 + }, + { + "epoch": 0.791604704609977, + "grad_norm": 1.2384518384933472, + "learning_rate": 3.321176880514864e-06, + "loss": 0.10172462463378906, + "step": 11711 + }, + { + "epoch": 0.7916722995809112, + "grad_norm": 0.8672864437103271, + "learning_rate": 3.319106591822423e-06, + "loss": 0.10845565795898438, + "step": 11712 + }, + { + "epoch": 0.7917398945518453, + "grad_norm": 1.2887598276138306, + "learning_rate": 3.3170368683242464e-06, + "loss": 0.210205078125, + "step": 11713 + }, + { + "epoch": 0.7918074895227795, + "grad_norm": 1.537129282951355, + "learning_rate": 3.3149677101204845e-06, + "loss": 0.18438720703125, + "step": 11714 + }, + { + "epoch": 0.7918750844937137, + "grad_norm": 0.3151644170284271, + "learning_rate": 3.3128991173112505e-06, + "loss": 0.0449066162109375, + "step": 11715 + }, + { + "epoch": 0.7919426794646478, + "grad_norm": 0.38283899426460266, + "learning_rate": 3.310831089996641e-06, + "loss": 0.047637939453125, + "step": 11716 + }, + { + "epoch": 0.792010274435582, + "grad_norm": 0.8186922669410706, + "learning_rate": 3.3087636282767157e-06, + "loss": 0.1652069091796875, + "step": 11717 + }, + { + "epoch": 0.7920778694065161, + "grad_norm": 0.5452049970626831, + "learning_rate": 3.3066967322515133e-06, + "loss": 0.08225250244140625, + "step": 11718 + }, + { + "epoch": 0.7921454643774504, + "grad_norm": 0.35099029541015625, + "learning_rate": 3.3046304020210387e-06, + "loss": 0.06603240966796875, + "step": 11719 + }, + { + "epoch": 0.7922130593483845, + "grad_norm": 0.3534395694732666, + "learning_rate": 3.302564637685287e-06, + "loss": 0.03314208984375, + "step": 11720 + }, + { + "epoch": 0.7922806543193186, + "grad_norm": 0.8003214597702026, + "learning_rate": 3.3004994393441963e-06, + "loss": 0.09590387344360352, + "step": 11721 + }, + { + "epoch": 0.7923482492902528, + "grad_norm": 0.9827211499214172, + "learning_rate": 3.2984348070977033e-06, + "loss": 0.210174560546875, + "step": 11722 + }, + { + "epoch": 0.7924158442611869, + "grad_norm": 0.5214802026748657, + "learning_rate": 3.29637074104571e-06, + "loss": 0.08095550537109375, + "step": 11723 + }, + { + "epoch": 0.7924834392321212, + "grad_norm": 1.021498441696167, + "learning_rate": 3.2943072412880766e-06, + "loss": 0.2039794921875, + "step": 11724 + }, + { + "epoch": 0.7925510342030553, + "grad_norm": 0.5080810189247131, + "learning_rate": 3.2922443079246613e-06, + "loss": 0.09994125366210938, + "step": 11725 + }, + { + "epoch": 0.7926186291739895, + "grad_norm": 0.8062461614608765, + "learning_rate": 3.2901819410552743e-06, + "loss": 0.1055755615234375, + "step": 11726 + }, + { + "epoch": 0.7926862241449236, + "grad_norm": 1.4694550037384033, + "learning_rate": 3.2881201407797075e-06, + "loss": 0.197967529296875, + "step": 11727 + }, + { + "epoch": 0.7927538191158577, + "grad_norm": 0.9620822668075562, + "learning_rate": 3.2860589071977245e-06, + "loss": 0.11737823486328125, + "step": 11728 + }, + { + "epoch": 0.792821414086792, + "grad_norm": 0.4316960573196411, + "learning_rate": 3.2839982404090587e-06, + "loss": 0.055081844329833984, + "step": 11729 + }, + { + "epoch": 0.7928890090577261, + "grad_norm": 0.5813807249069214, + "learning_rate": 3.281938140513417e-06, + "loss": 0.0939788818359375, + "step": 11730 + }, + { + "epoch": 0.7929566040286603, + "grad_norm": 0.21799305081367493, + "learning_rate": 3.279878607610488e-06, + "loss": 0.0185089111328125, + "step": 11731 + }, + { + "epoch": 0.7930241989995944, + "grad_norm": 0.4806939959526062, + "learning_rate": 3.2778196417999106e-06, + "loss": 0.10424041748046875, + "step": 11732 + }, + { + "epoch": 0.7930917939705286, + "grad_norm": 0.8370825052261353, + "learning_rate": 3.2757612431813283e-06, + "loss": 0.1458740234375, + "step": 11733 + }, + { + "epoch": 0.7931593889414628, + "grad_norm": 0.45089685916900635, + "learning_rate": 3.273703411854323e-06, + "loss": 0.08083724975585938, + "step": 11734 + }, + { + "epoch": 0.7932269839123969, + "grad_norm": 0.5919736623764038, + "learning_rate": 3.27164614791847e-06, + "loss": 0.10863494873046875, + "step": 11735 + }, + { + "epoch": 0.7932945788833311, + "grad_norm": 0.20978984236717224, + "learning_rate": 3.26958945147332e-06, + "loss": 0.03339862823486328, + "step": 11736 + }, + { + "epoch": 0.7933621738542652, + "grad_norm": 1.1169750690460205, + "learning_rate": 3.2675333226183764e-06, + "loss": 0.197235107421875, + "step": 11737 + }, + { + "epoch": 0.7934297688251994, + "grad_norm": 1.3699703216552734, + "learning_rate": 3.2654777614531377e-06, + "loss": 0.167083740234375, + "step": 11738 + }, + { + "epoch": 0.7934973637961336, + "grad_norm": 0.245853453874588, + "learning_rate": 3.2634227680770618e-06, + "loss": 0.04286956787109375, + "step": 11739 + }, + { + "epoch": 0.7935649587670678, + "grad_norm": 0.666362464427948, + "learning_rate": 3.26136834258958e-06, + "loss": 0.121490478515625, + "step": 11740 + }, + { + "epoch": 0.7936325537380019, + "grad_norm": 1.4280756711959839, + "learning_rate": 3.2593144850901004e-06, + "loss": 0.257354736328125, + "step": 11741 + }, + { + "epoch": 0.793700148708936, + "grad_norm": 0.9579311609268188, + "learning_rate": 3.257261195678001e-06, + "loss": 0.11666107177734375, + "step": 11742 + }, + { + "epoch": 0.7937677436798702, + "grad_norm": 0.22683747112751007, + "learning_rate": 3.255208474452627e-06, + "loss": 0.04927825927734375, + "step": 11743 + }, + { + "epoch": 0.7938353386508044, + "grad_norm": 1.13379967212677, + "learning_rate": 3.2531563215133154e-06, + "loss": 0.1336822509765625, + "step": 11744 + }, + { + "epoch": 0.7939029336217386, + "grad_norm": 0.6078404784202576, + "learning_rate": 3.2511047369593507e-06, + "loss": 0.08571624755859375, + "step": 11745 + }, + { + "epoch": 0.7939705285926727, + "grad_norm": 0.7694280743598938, + "learning_rate": 3.2490537208900036e-06, + "loss": 0.15924072265625, + "step": 11746 + }, + { + "epoch": 0.7940381235636068, + "grad_norm": 0.5202966332435608, + "learning_rate": 3.247003273404516e-06, + "loss": 0.09123992919921875, + "step": 11747 + }, + { + "epoch": 0.794105718534541, + "grad_norm": 1.3064135313034058, + "learning_rate": 3.244953394602098e-06, + "loss": 0.13748931884765625, + "step": 11748 + }, + { + "epoch": 0.7941733135054752, + "grad_norm": 0.4870414733886719, + "learning_rate": 3.2429040845819446e-06, + "loss": 0.0982208251953125, + "step": 11749 + }, + { + "epoch": 0.7942409084764094, + "grad_norm": 1.0424072742462158, + "learning_rate": 3.2408553434432016e-06, + "loss": 0.180206298828125, + "step": 11750 + }, + { + "epoch": 0.7943085034473435, + "grad_norm": 1.2074017524719238, + "learning_rate": 3.238807171285011e-06, + "loss": 0.179473876953125, + "step": 11751 + }, + { + "epoch": 0.7943760984182777, + "grad_norm": 0.3958086669445038, + "learning_rate": 3.2367595682064706e-06, + "loss": 0.07686042785644531, + "step": 11752 + }, + { + "epoch": 0.7944436933892118, + "grad_norm": 0.6191377639770508, + "learning_rate": 3.2347125343066573e-06, + "loss": 0.12091827392578125, + "step": 11753 + }, + { + "epoch": 0.794511288360146, + "grad_norm": 1.156174898147583, + "learning_rate": 3.2326660696846195e-06, + "loss": 0.190887451171875, + "step": 11754 + }, + { + "epoch": 0.7945788833310802, + "grad_norm": 1.356871247291565, + "learning_rate": 3.230620174439377e-06, + "loss": 0.2158203125, + "step": 11755 + }, + { + "epoch": 0.7946464783020143, + "grad_norm": 0.3514116108417511, + "learning_rate": 3.2285748486699198e-06, + "loss": 0.047863006591796875, + "step": 11756 + }, + { + "epoch": 0.7947140732729485, + "grad_norm": 1.3752511739730835, + "learning_rate": 3.2265300924752243e-06, + "loss": 0.1932220458984375, + "step": 11757 + }, + { + "epoch": 0.7947816682438826, + "grad_norm": 0.8687957525253296, + "learning_rate": 3.2244859059542176e-06, + "loss": 0.170684814453125, + "step": 11758 + }, + { + "epoch": 0.7948492632148169, + "grad_norm": 0.9700595736503601, + "learning_rate": 3.2224422892058098e-06, + "loss": 0.209014892578125, + "step": 11759 + }, + { + "epoch": 0.794916858185751, + "grad_norm": 0.35016733407974243, + "learning_rate": 3.220399242328897e-06, + "loss": 0.0641021728515625, + "step": 11760 + }, + { + "epoch": 0.7949844531566851, + "grad_norm": 1.1766014099121094, + "learning_rate": 3.218356765422317e-06, + "loss": 0.12694549560546875, + "step": 11761 + }, + { + "epoch": 0.7950520481276193, + "grad_norm": 0.29363852739334106, + "learning_rate": 3.2163148585849134e-06, + "loss": 0.04573822021484375, + "step": 11762 + }, + { + "epoch": 0.7951196430985534, + "grad_norm": 1.2619333267211914, + "learning_rate": 3.2142735219154714e-06, + "loss": 0.201202392578125, + "step": 11763 + }, + { + "epoch": 0.7951872380694877, + "grad_norm": 0.4795505404472351, + "learning_rate": 3.2122327555127745e-06, + "loss": 0.0871124267578125, + "step": 11764 + }, + { + "epoch": 0.7952548330404218, + "grad_norm": 0.720435380935669, + "learning_rate": 3.2101925594755645e-06, + "loss": 0.120849609375, + "step": 11765 + }, + { + "epoch": 0.795322428011356, + "grad_norm": 0.3159828782081604, + "learning_rate": 3.2081529339025584e-06, + "loss": 0.059597015380859375, + "step": 11766 + }, + { + "epoch": 0.7953900229822901, + "grad_norm": 0.5433838367462158, + "learning_rate": 3.206113878892446e-06, + "loss": 0.10327911376953125, + "step": 11767 + }, + { + "epoch": 0.7954576179532242, + "grad_norm": 0.3621673882007599, + "learning_rate": 3.20407539454389e-06, + "loss": 0.0790863037109375, + "step": 11768 + }, + { + "epoch": 0.7955252129241585, + "grad_norm": 1.443395733833313, + "learning_rate": 3.2020374809555248e-06, + "loss": 0.181610107421875, + "step": 11769 + }, + { + "epoch": 0.7955928078950926, + "grad_norm": 1.7153369188308716, + "learning_rate": 3.2000001382259563e-06, + "loss": 0.15673065185546875, + "step": 11770 + }, + { + "epoch": 0.7956604028660268, + "grad_norm": 0.7773624062538147, + "learning_rate": 3.1979633664537645e-06, + "loss": 0.134063720703125, + "step": 11771 + }, + { + "epoch": 0.7957279978369609, + "grad_norm": 0.3330768644809723, + "learning_rate": 3.1959271657374984e-06, + "loss": 0.082244873046875, + "step": 11772 + }, + { + "epoch": 0.7957955928078951, + "grad_norm": 0.5446993708610535, + "learning_rate": 3.193891536175691e-06, + "loss": 0.095001220703125, + "step": 11773 + }, + { + "epoch": 0.7958631877788293, + "grad_norm": 0.24217352271080017, + "learning_rate": 3.191856477866825e-06, + "loss": 0.03899383544921875, + "step": 11774 + }, + { + "epoch": 0.7959307827497634, + "grad_norm": 1.5069942474365234, + "learning_rate": 3.189821990909381e-06, + "loss": 0.1826171875, + "step": 11775 + }, + { + "epoch": 0.7959983777206976, + "grad_norm": 0.4270787239074707, + "learning_rate": 3.187788075401795e-06, + "loss": 0.0667572021484375, + "step": 11776 + }, + { + "epoch": 0.7960659726916317, + "grad_norm": 1.2154741287231445, + "learning_rate": 3.185754731442482e-06, + "loss": 0.12388229370117188, + "step": 11777 + }, + { + "epoch": 0.7961335676625659, + "grad_norm": 0.7707481980323792, + "learning_rate": 3.1837219591298267e-06, + "loss": 0.08583831787109375, + "step": 11778 + }, + { + "epoch": 0.7962011626335, + "grad_norm": 1.0175743103027344, + "learning_rate": 3.181689758562188e-06, + "loss": 0.1522216796875, + "step": 11779 + }, + { + "epoch": 0.7962687576044343, + "grad_norm": 0.27243709564208984, + "learning_rate": 3.1796581298378967e-06, + "loss": 0.03766441345214844, + "step": 11780 + }, + { + "epoch": 0.7963363525753684, + "grad_norm": 1.378904938697815, + "learning_rate": 3.1776270730552546e-06, + "loss": 0.147613525390625, + "step": 11781 + }, + { + "epoch": 0.7964039475463025, + "grad_norm": 0.731543242931366, + "learning_rate": 3.1755965883125366e-06, + "loss": 0.1110687255859375, + "step": 11782 + }, + { + "epoch": 0.7964715425172367, + "grad_norm": 0.6526904106140137, + "learning_rate": 3.1735666757079903e-06, + "loss": 0.0883636474609375, + "step": 11783 + }, + { + "epoch": 0.7965391374881708, + "grad_norm": 0.2960960566997528, + "learning_rate": 3.171537335339837e-06, + "loss": 0.04604339599609375, + "step": 11784 + }, + { + "epoch": 0.7966067324591051, + "grad_norm": 0.4576054811477661, + "learning_rate": 3.1695085673062636e-06, + "loss": 0.0697479248046875, + "step": 11785 + }, + { + "epoch": 0.7966743274300392, + "grad_norm": 0.32045692205429077, + "learning_rate": 3.167480371705448e-06, + "loss": 0.0594635009765625, + "step": 11786 + }, + { + "epoch": 0.7967419224009734, + "grad_norm": 0.5589773058891296, + "learning_rate": 3.165452748635508e-06, + "loss": 0.0869140625, + "step": 11787 + }, + { + "epoch": 0.7968095173719075, + "grad_norm": 0.9181641936302185, + "learning_rate": 3.1634256981945663e-06, + "loss": 0.1771240234375, + "step": 11788 + }, + { + "epoch": 0.7968771123428416, + "grad_norm": 0.6813453435897827, + "learning_rate": 3.161399220480702e-06, + "loss": 0.1443328857421875, + "step": 11789 + }, + { + "epoch": 0.7969447073137759, + "grad_norm": 0.9929502606391907, + "learning_rate": 3.1593733155919647e-06, + "loss": 0.1454010009765625, + "step": 11790 + }, + { + "epoch": 0.79701230228471, + "grad_norm": 0.8979760408401489, + "learning_rate": 3.157347983626383e-06, + "loss": 0.13824462890625, + "step": 11791 + }, + { + "epoch": 0.7970798972556442, + "grad_norm": 0.6883516311645508, + "learning_rate": 3.155323224681955e-06, + "loss": 0.07303428649902344, + "step": 11792 + }, + { + "epoch": 0.7971474922265783, + "grad_norm": 0.28244444727897644, + "learning_rate": 3.153299038856651e-06, + "loss": 0.05843353271484375, + "step": 11793 + }, + { + "epoch": 0.7972150871975126, + "grad_norm": 0.16501393914222717, + "learning_rate": 3.151275426248413e-06, + "loss": 0.024059295654296875, + "step": 11794 + }, + { + "epoch": 0.7972826821684467, + "grad_norm": 0.30913394689559937, + "learning_rate": 3.1492523869551566e-06, + "loss": 0.0536956787109375, + "step": 11795 + }, + { + "epoch": 0.7973502771393808, + "grad_norm": 0.4081430435180664, + "learning_rate": 3.1472299210747673e-06, + "loss": 0.0824737548828125, + "step": 11796 + }, + { + "epoch": 0.797417872110315, + "grad_norm": 0.841231107711792, + "learning_rate": 3.145208028705107e-06, + "loss": 0.105712890625, + "step": 11797 + }, + { + "epoch": 0.7974854670812491, + "grad_norm": 0.7894511818885803, + "learning_rate": 3.1431867099440028e-06, + "loss": 0.1532135009765625, + "step": 11798 + }, + { + "epoch": 0.7975530620521833, + "grad_norm": 0.8904423713684082, + "learning_rate": 3.1411659648892705e-06, + "loss": 0.09697723388671875, + "step": 11799 + }, + { + "epoch": 0.7976206570231175, + "grad_norm": 0.27399176359176636, + "learning_rate": 3.1391457936386693e-06, + "loss": 0.05757904052734375, + "step": 11800 + }, + { + "epoch": 0.7976882519940517, + "grad_norm": 0.7136213183403015, + "learning_rate": 3.1371261962899607e-06, + "loss": 0.1279296875, + "step": 11801 + }, + { + "epoch": 0.7977558469649858, + "grad_norm": 0.7419182062149048, + "learning_rate": 3.1351071729408597e-06, + "loss": 0.1189422607421875, + "step": 11802 + }, + { + "epoch": 0.7978234419359199, + "grad_norm": 0.3387291133403778, + "learning_rate": 3.133088723689064e-06, + "loss": 0.053009033203125, + "step": 11803 + }, + { + "epoch": 0.7978910369068541, + "grad_norm": 0.48346513509750366, + "learning_rate": 3.1310708486322327e-06, + "loss": 0.059383392333984375, + "step": 11804 + }, + { + "epoch": 0.7979586318777883, + "grad_norm": 1.1292105913162231, + "learning_rate": 3.1290535478680067e-06, + "loss": 0.176849365234375, + "step": 11805 + }, + { + "epoch": 0.7980262268487225, + "grad_norm": 0.44046491384506226, + "learning_rate": 3.127036821493994e-06, + "loss": 0.06703948974609375, + "step": 11806 + }, + { + "epoch": 0.7980938218196566, + "grad_norm": 1.0786722898483276, + "learning_rate": 3.1250206696077775e-06, + "loss": 0.175445556640625, + "step": 11807 + }, + { + "epoch": 0.7981614167905908, + "grad_norm": 0.896296501159668, + "learning_rate": 3.123005092306912e-06, + "loss": 0.1090087890625, + "step": 11808 + }, + { + "epoch": 0.7982290117615249, + "grad_norm": 0.4267156422138214, + "learning_rate": 3.120990089688921e-06, + "loss": 0.07405471801757812, + "step": 11809 + }, + { + "epoch": 0.7982966067324591, + "grad_norm": 1.1160924434661865, + "learning_rate": 3.118975661851303e-06, + "loss": 0.1499176025390625, + "step": 11810 + }, + { + "epoch": 0.7983642017033933, + "grad_norm": 0.7851077914237976, + "learning_rate": 3.1169618088915275e-06, + "loss": 0.1419830322265625, + "step": 11811 + }, + { + "epoch": 0.7984317966743274, + "grad_norm": 0.8245777487754822, + "learning_rate": 3.1149485309070474e-06, + "loss": 0.09852027893066406, + "step": 11812 + }, + { + "epoch": 0.7984993916452616, + "grad_norm": 1.0274128913879395, + "learning_rate": 3.112935827995262e-06, + "loss": 0.195709228515625, + "step": 11813 + }, + { + "epoch": 0.7985669866161957, + "grad_norm": 0.3195628821849823, + "learning_rate": 3.1109237002535696e-06, + "loss": 0.0507049560546875, + "step": 11814 + }, + { + "epoch": 0.79863458158713, + "grad_norm": 0.3207813501358032, + "learning_rate": 3.1089121477793293e-06, + "loss": 0.06464385986328125, + "step": 11815 + }, + { + "epoch": 0.7987021765580641, + "grad_norm": 0.38814660906791687, + "learning_rate": 3.106901170669863e-06, + "loss": 0.06233978271484375, + "step": 11816 + }, + { + "epoch": 0.7987697715289982, + "grad_norm": 0.7805919647216797, + "learning_rate": 3.104890769022483e-06, + "loss": 0.127227783203125, + "step": 11817 + }, + { + "epoch": 0.7988373664999324, + "grad_norm": 0.6740556955337524, + "learning_rate": 3.1028809429344617e-06, + "loss": 0.10727214813232422, + "step": 11818 + }, + { + "epoch": 0.7989049614708665, + "grad_norm": 0.5460535287857056, + "learning_rate": 3.1008716925030495e-06, + "loss": 0.10899543762207031, + "step": 11819 + }, + { + "epoch": 0.7989725564418008, + "grad_norm": 0.7132325768470764, + "learning_rate": 3.0988630178254633e-06, + "loss": 0.14569091796875, + "step": 11820 + }, + { + "epoch": 0.7990401514127349, + "grad_norm": 0.335093230009079, + "learning_rate": 3.0968549189988976e-06, + "loss": 0.06495285034179688, + "step": 11821 + }, + { + "epoch": 0.7991077463836691, + "grad_norm": 0.5917211771011353, + "learning_rate": 3.0948473961205124e-06, + "loss": 0.1005401611328125, + "step": 11822 + }, + { + "epoch": 0.7991753413546032, + "grad_norm": 1.330222487449646, + "learning_rate": 3.092840449287454e-06, + "loss": 0.180450439453125, + "step": 11823 + }, + { + "epoch": 0.7992429363255373, + "grad_norm": 0.6748215556144714, + "learning_rate": 3.0908340785968176e-06, + "loss": 0.104217529296875, + "step": 11824 + }, + { + "epoch": 0.7993105312964716, + "grad_norm": 0.8002345561981201, + "learning_rate": 3.0888282841456986e-06, + "loss": 0.1751708984375, + "step": 11825 + }, + { + "epoch": 0.7993781262674057, + "grad_norm": 0.8560336828231812, + "learning_rate": 3.086823066031134e-06, + "loss": 0.12158584594726562, + "step": 11826 + }, + { + "epoch": 0.7994457212383399, + "grad_norm": 0.36126381158828735, + "learning_rate": 3.084818424350161e-06, + "loss": 0.06298828125, + "step": 11827 + }, + { + "epoch": 0.799513316209274, + "grad_norm": 0.29860976338386536, + "learning_rate": 3.082814359199775e-06, + "loss": 0.050518035888671875, + "step": 11828 + }, + { + "epoch": 0.7995809111802082, + "grad_norm": 0.29735267162323, + "learning_rate": 3.080810870676936e-06, + "loss": 0.059452056884765625, + "step": 11829 + }, + { + "epoch": 0.7996485061511424, + "grad_norm": 0.8687483668327332, + "learning_rate": 3.0788079588785954e-06, + "loss": 0.10297584533691406, + "step": 11830 + }, + { + "epoch": 0.7997161011220765, + "grad_norm": 0.6033396124839783, + "learning_rate": 3.0768056239016624e-06, + "loss": 0.09370040893554688, + "step": 11831 + }, + { + "epoch": 0.7997836960930107, + "grad_norm": 0.35556939244270325, + "learning_rate": 3.0748038658430224e-06, + "loss": 0.06261062622070312, + "step": 11832 + }, + { + "epoch": 0.7998512910639448, + "grad_norm": 0.7839720249176025, + "learning_rate": 3.0728026847995343e-06, + "loss": 0.129364013671875, + "step": 11833 + }, + { + "epoch": 0.799918886034879, + "grad_norm": 0.6295744776725769, + "learning_rate": 3.0708020808680265e-06, + "loss": 0.11469268798828125, + "step": 11834 + }, + { + "epoch": 0.7999864810058132, + "grad_norm": 0.5070308446884155, + "learning_rate": 3.068802054145296e-06, + "loss": 0.09415435791015625, + "step": 11835 + }, + { + "epoch": 0.8000540759767474, + "grad_norm": 0.9446154236793518, + "learning_rate": 3.0668026047281304e-06, + "loss": 0.1818084716796875, + "step": 11836 + }, + { + "epoch": 0.8001216709476815, + "grad_norm": 0.46211761236190796, + "learning_rate": 3.0648037327132596e-06, + "loss": 0.0882720947265625, + "step": 11837 + }, + { + "epoch": 0.8001892659186156, + "grad_norm": 0.4543699324131012, + "learning_rate": 3.062805438197415e-06, + "loss": 0.0659637451171875, + "step": 11838 + }, + { + "epoch": 0.8002568608895498, + "grad_norm": 0.7939251661300659, + "learning_rate": 3.0608077212772757e-06, + "loss": 0.12652587890625, + "step": 11839 + }, + { + "epoch": 0.800324455860484, + "grad_norm": 0.3241584599018097, + "learning_rate": 3.0588105820495047e-06, + "loss": 0.04633331298828125, + "step": 11840 + }, + { + "epoch": 0.8003920508314182, + "grad_norm": 1.26749849319458, + "learning_rate": 3.056814020610746e-06, + "loss": 0.183319091796875, + "step": 11841 + }, + { + "epoch": 0.8004596458023523, + "grad_norm": 0.3663859963417053, + "learning_rate": 3.054818037057593e-06, + "loss": 0.0793914794921875, + "step": 11842 + }, + { + "epoch": 0.8005272407732865, + "grad_norm": 0.47760695219039917, + "learning_rate": 3.052822631486632e-06, + "loss": 0.06154918670654297, + "step": 11843 + }, + { + "epoch": 0.8005948357442206, + "grad_norm": 0.6401395797729492, + "learning_rate": 3.050827803994411e-06, + "loss": 0.07733154296875, + "step": 11844 + }, + { + "epoch": 0.8006624307151547, + "grad_norm": 0.31000134348869324, + "learning_rate": 3.0488335546774526e-06, + "loss": 0.05084228515625, + "step": 11845 + }, + { + "epoch": 0.800730025686089, + "grad_norm": 0.7924599647521973, + "learning_rate": 3.0468398836322497e-06, + "loss": 0.09978103637695312, + "step": 11846 + }, + { + "epoch": 0.8007976206570231, + "grad_norm": 0.6266353726387024, + "learning_rate": 3.0448467909552687e-06, + "loss": 0.1260833740234375, + "step": 11847 + }, + { + "epoch": 0.8008652156279573, + "grad_norm": 0.4232473373413086, + "learning_rate": 3.042854276742945e-06, + "loss": 0.059734344482421875, + "step": 11848 + }, + { + "epoch": 0.8009328105988914, + "grad_norm": 1.0600144863128662, + "learning_rate": 3.040862341091699e-06, + "loss": 0.1823577880859375, + "step": 11849 + }, + { + "epoch": 0.8010004055698257, + "grad_norm": 1.0105481147766113, + "learning_rate": 3.0388709840979035e-06, + "loss": 0.133819580078125, + "step": 11850 + }, + { + "epoch": 0.8010680005407598, + "grad_norm": 0.33958685398101807, + "learning_rate": 3.0368802058579107e-06, + "loss": 0.0639190673828125, + "step": 11851 + }, + { + "epoch": 0.8011355955116939, + "grad_norm": 0.24353928864002228, + "learning_rate": 3.0348900064680597e-06, + "loss": 0.040180206298828125, + "step": 11852 + }, + { + "epoch": 0.8012031904826281, + "grad_norm": 1.1249003410339355, + "learning_rate": 3.0329003860246323e-06, + "loss": 0.15411376953125, + "step": 11853 + }, + { + "epoch": 0.8012707854535622, + "grad_norm": 0.22351554036140442, + "learning_rate": 3.030911344623914e-06, + "loss": 0.038211822509765625, + "step": 11854 + }, + { + "epoch": 0.8013383804244965, + "grad_norm": 0.28860345482826233, + "learning_rate": 3.028922882362132e-06, + "loss": 0.0328826904296875, + "step": 11855 + }, + { + "epoch": 0.8014059753954306, + "grad_norm": 0.928654134273529, + "learning_rate": 3.026934999335512e-06, + "loss": 0.1204833984375, + "step": 11856 + }, + { + "epoch": 0.8014735703663648, + "grad_norm": 0.6130831241607666, + "learning_rate": 3.0249476956402383e-06, + "loss": 0.07721233367919922, + "step": 11857 + }, + { + "epoch": 0.8015411653372989, + "grad_norm": 0.2685927748680115, + "learning_rate": 3.0229609713724655e-06, + "loss": 0.04355621337890625, + "step": 11858 + }, + { + "epoch": 0.801608760308233, + "grad_norm": 1.69203519821167, + "learning_rate": 3.0209748266283264e-06, + "loss": 0.1990203857421875, + "step": 11859 + }, + { + "epoch": 0.8016763552791673, + "grad_norm": 0.6419505476951599, + "learning_rate": 3.01898926150392e-06, + "loss": 0.0851898193359375, + "step": 11860 + }, + { + "epoch": 0.8017439502501014, + "grad_norm": 1.3416259288787842, + "learning_rate": 3.0170042760953204e-06, + "loss": 0.214202880859375, + "step": 11861 + }, + { + "epoch": 0.8018115452210356, + "grad_norm": 0.8132742047309875, + "learning_rate": 3.0150198704985825e-06, + "loss": 0.1484375, + "step": 11862 + }, + { + "epoch": 0.8018791401919697, + "grad_norm": 0.8045405149459839, + "learning_rate": 3.013036044809713e-06, + "loss": 0.168121337890625, + "step": 11863 + }, + { + "epoch": 0.8019467351629039, + "grad_norm": 1.100737452507019, + "learning_rate": 3.011052799124702e-06, + "loss": 0.157562255859375, + "step": 11864 + }, + { + "epoch": 0.802014330133838, + "grad_norm": 0.44050151109695435, + "learning_rate": 3.009070133539523e-06, + "loss": 0.07958602905273438, + "step": 11865 + }, + { + "epoch": 0.8020819251047722, + "grad_norm": 2.0052433013916016, + "learning_rate": 3.0070880481500928e-06, + "loss": 0.1646575927734375, + "step": 11866 + }, + { + "epoch": 0.8021495200757064, + "grad_norm": 0.42355257272720337, + "learning_rate": 3.005106543052331e-06, + "loss": 0.05527782440185547, + "step": 11867 + }, + { + "epoch": 0.8022171150466405, + "grad_norm": 0.43257227540016174, + "learning_rate": 3.0031256183421107e-06, + "loss": 0.085845947265625, + "step": 11868 + }, + { + "epoch": 0.8022847100175747, + "grad_norm": 0.36474359035491943, + "learning_rate": 3.0011452741152783e-06, + "loss": 0.061492919921875, + "step": 11869 + }, + { + "epoch": 0.8023523049885088, + "grad_norm": 1.2214031219482422, + "learning_rate": 2.999165510467661e-06, + "loss": 0.173797607421875, + "step": 11870 + }, + { + "epoch": 0.8024198999594431, + "grad_norm": 0.5553992390632629, + "learning_rate": 2.9971863274950462e-06, + "loss": 0.09258270263671875, + "step": 11871 + }, + { + "epoch": 0.8024874949303772, + "grad_norm": 1.1670321226119995, + "learning_rate": 2.9952077252932014e-06, + "loss": 0.1281280517578125, + "step": 11872 + }, + { + "epoch": 0.8025550899013113, + "grad_norm": 1.055650234222412, + "learning_rate": 2.993229703957865e-06, + "loss": 0.11779022216796875, + "step": 11873 + }, + { + "epoch": 0.8026226848722455, + "grad_norm": 0.29369187355041504, + "learning_rate": 2.991252263584745e-06, + "loss": 0.0443572998046875, + "step": 11874 + }, + { + "epoch": 0.8026902798431796, + "grad_norm": 0.5178038477897644, + "learning_rate": 2.989275404269522e-06, + "loss": 0.075897216796875, + "step": 11875 + }, + { + "epoch": 0.8027578748141139, + "grad_norm": 0.37073495984077454, + "learning_rate": 2.9872991261078474e-06, + "loss": 0.0613555908203125, + "step": 11876 + }, + { + "epoch": 0.802825469785048, + "grad_norm": 1.5097630023956299, + "learning_rate": 2.985323429195345e-06, + "loss": 0.179351806640625, + "step": 11877 + }, + { + "epoch": 0.8028930647559821, + "grad_norm": 0.9905350208282471, + "learning_rate": 2.98334831362762e-06, + "loss": 0.1340179443359375, + "step": 11878 + }, + { + "epoch": 0.8029606597269163, + "grad_norm": 0.20679503679275513, + "learning_rate": 2.981373779500228e-06, + "loss": 0.03571510314941406, + "step": 11879 + }, + { + "epoch": 0.8030282546978504, + "grad_norm": 1.1565667390823364, + "learning_rate": 2.979399826908718e-06, + "loss": 0.16193389892578125, + "step": 11880 + }, + { + "epoch": 0.8030958496687847, + "grad_norm": 1.5262730121612549, + "learning_rate": 2.9774264559486015e-06, + "loss": 0.221527099609375, + "step": 11881 + }, + { + "epoch": 0.8031634446397188, + "grad_norm": 0.5484585165977478, + "learning_rate": 2.97545366671536e-06, + "loss": 0.105377197265625, + "step": 11882 + }, + { + "epoch": 0.803231039610653, + "grad_norm": 0.5314573645591736, + "learning_rate": 2.9734814593044484e-06, + "loss": 0.08392333984375, + "step": 11883 + }, + { + "epoch": 0.8032986345815871, + "grad_norm": 0.9677156805992126, + "learning_rate": 2.9715098338112977e-06, + "loss": 0.1225128173828125, + "step": 11884 + }, + { + "epoch": 0.8033662295525212, + "grad_norm": 0.8114175796508789, + "learning_rate": 2.9695387903313047e-06, + "loss": 0.1026153564453125, + "step": 11885 + }, + { + "epoch": 0.8034338245234555, + "grad_norm": 0.8263906836509705, + "learning_rate": 2.9675683289598434e-06, + "loss": 0.0971221923828125, + "step": 11886 + }, + { + "epoch": 0.8035014194943896, + "grad_norm": 0.7999785542488098, + "learning_rate": 2.9655984497922534e-06, + "loss": 0.1278839111328125, + "step": 11887 + }, + { + "epoch": 0.8035690144653238, + "grad_norm": 0.33880382776260376, + "learning_rate": 2.9636291529238505e-06, + "loss": 0.06417083740234375, + "step": 11888 + }, + { + "epoch": 0.8036366094362579, + "grad_norm": 0.5137070417404175, + "learning_rate": 2.961660438449926e-06, + "loss": 0.08883285522460938, + "step": 11889 + }, + { + "epoch": 0.8037042044071921, + "grad_norm": 0.9325622916221619, + "learning_rate": 2.959692306465729e-06, + "loss": 0.170654296875, + "step": 11890 + }, + { + "epoch": 0.8037717993781263, + "grad_norm": 0.7432513236999512, + "learning_rate": 2.957724757066504e-06, + "loss": 0.11743927001953125, + "step": 11891 + }, + { + "epoch": 0.8038393943490604, + "grad_norm": 0.844717800617218, + "learning_rate": 2.9557577903474365e-06, + "loss": 0.12148284912109375, + "step": 11892 + }, + { + "epoch": 0.8039069893199946, + "grad_norm": 1.2172894477844238, + "learning_rate": 2.9537914064037153e-06, + "loss": 0.1588592529296875, + "step": 11893 + }, + { + "epoch": 0.8039745842909287, + "grad_norm": 0.3607020080089569, + "learning_rate": 2.951825605330479e-06, + "loss": 0.055065155029296875, + "step": 11894 + }, + { + "epoch": 0.8040421792618629, + "grad_norm": 0.35200658440589905, + "learning_rate": 2.9498603872228475e-06, + "loss": 0.06909942626953125, + "step": 11895 + }, + { + "epoch": 0.8041097742327971, + "grad_norm": 0.4314676821231842, + "learning_rate": 2.94789575217591e-06, + "loss": 0.08925628662109375, + "step": 11896 + }, + { + "epoch": 0.8041773692037313, + "grad_norm": 0.342729777097702, + "learning_rate": 2.945931700284727e-06, + "loss": 0.052093505859375, + "step": 11897 + }, + { + "epoch": 0.8042449641746654, + "grad_norm": 0.7384445071220398, + "learning_rate": 2.9439682316443314e-06, + "loss": 0.1264801025390625, + "step": 11898 + }, + { + "epoch": 0.8043125591455995, + "grad_norm": 0.5415329337120056, + "learning_rate": 2.9420053463497273e-06, + "loss": 0.09976577758789062, + "step": 11899 + }, + { + "epoch": 0.8043801541165337, + "grad_norm": 0.3239278197288513, + "learning_rate": 2.940043044495894e-06, + "loss": 0.05669403076171875, + "step": 11900 + }, + { + "epoch": 0.8044477490874679, + "grad_norm": 0.6779767870903015, + "learning_rate": 2.9380813261777767e-06, + "loss": 0.092498779296875, + "step": 11901 + }, + { + "epoch": 0.8045153440584021, + "grad_norm": 0.23209814727306366, + "learning_rate": 2.9361201914902985e-06, + "loss": 0.02851104736328125, + "step": 11902 + }, + { + "epoch": 0.8045829390293362, + "grad_norm": 0.17546814680099487, + "learning_rate": 2.9341596405283465e-06, + "loss": 0.021787643432617188, + "step": 11903 + }, + { + "epoch": 0.8046505340002704, + "grad_norm": 0.3439200520515442, + "learning_rate": 2.932199673386795e-06, + "loss": 0.04914093017578125, + "step": 11904 + }, + { + "epoch": 0.8047181289712045, + "grad_norm": 0.6161302924156189, + "learning_rate": 2.930240290160463e-06, + "loss": 0.10167694091796875, + "step": 11905 + }, + { + "epoch": 0.8047857239421387, + "grad_norm": 0.43912217020988464, + "learning_rate": 2.9282814909441728e-06, + "loss": 0.055103302001953125, + "step": 11906 + }, + { + "epoch": 0.8048533189130729, + "grad_norm": 0.5425301790237427, + "learning_rate": 2.9263232758326957e-06, + "loss": 0.071441650390625, + "step": 11907 + }, + { + "epoch": 0.804920913884007, + "grad_norm": 0.7302699685096741, + "learning_rate": 2.924365644920784e-06, + "loss": 0.1341705322265625, + "step": 11908 + }, + { + "epoch": 0.8049885088549412, + "grad_norm": 1.5164613723754883, + "learning_rate": 2.92240859830316e-06, + "loss": 0.1531524658203125, + "step": 11909 + }, + { + "epoch": 0.8050561038258753, + "grad_norm": 0.8687766790390015, + "learning_rate": 2.9204521360745184e-06, + "loss": 0.14879608154296875, + "step": 11910 + }, + { + "epoch": 0.8051236987968096, + "grad_norm": 0.38863858580589294, + "learning_rate": 2.918496258329525e-06, + "loss": 0.052825927734375, + "step": 11911 + }, + { + "epoch": 0.8051912937677437, + "grad_norm": 0.8936759233474731, + "learning_rate": 2.9165409651628156e-06, + "loss": 0.170654296875, + "step": 11912 + }, + { + "epoch": 0.8052588887386778, + "grad_norm": 0.49733471870422363, + "learning_rate": 2.914586256669001e-06, + "loss": 0.1190643310546875, + "step": 11913 + }, + { + "epoch": 0.805326483709612, + "grad_norm": 0.6877506971359253, + "learning_rate": 2.9126321329426566e-06, + "loss": 0.1423492431640625, + "step": 11914 + }, + { + "epoch": 0.8053940786805461, + "grad_norm": 0.490813672542572, + "learning_rate": 2.910678594078349e-06, + "loss": 0.09686279296875, + "step": 11915 + }, + { + "epoch": 0.8054616736514804, + "grad_norm": 0.730522871017456, + "learning_rate": 2.9087256401705857e-06, + "loss": 0.12163543701171875, + "step": 11916 + }, + { + "epoch": 0.8055292686224145, + "grad_norm": 1.0982894897460938, + "learning_rate": 2.906773271313879e-06, + "loss": 0.13428115844726562, + "step": 11917 + }, + { + "epoch": 0.8055968635933487, + "grad_norm": 0.3052131235599518, + "learning_rate": 2.9048214876026807e-06, + "loss": 0.05419921875, + "step": 11918 + }, + { + "epoch": 0.8056644585642828, + "grad_norm": 0.5043603777885437, + "learning_rate": 2.9028702891314408e-06, + "loss": 0.09233856201171875, + "step": 11919 + }, + { + "epoch": 0.8057320535352169, + "grad_norm": 0.8508709073066711, + "learning_rate": 2.9009196759945716e-06, + "loss": 0.105072021484375, + "step": 11920 + }, + { + "epoch": 0.8057996485061512, + "grad_norm": 0.7638302445411682, + "learning_rate": 2.8989696482864446e-06, + "loss": 0.1115264892578125, + "step": 11921 + }, + { + "epoch": 0.8058672434770853, + "grad_norm": 1.141209363937378, + "learning_rate": 2.897020206101426e-06, + "loss": 0.1370849609375, + "step": 11922 + }, + { + "epoch": 0.8059348384480195, + "grad_norm": 0.24227996170520782, + "learning_rate": 2.895071349533837e-06, + "loss": 0.03336334228515625, + "step": 11923 + }, + { + "epoch": 0.8060024334189536, + "grad_norm": 0.6137807965278625, + "learning_rate": 2.8931230786779757e-06, + "loss": 0.09825897216796875, + "step": 11924 + }, + { + "epoch": 0.8060700283898878, + "grad_norm": 0.8910923004150391, + "learning_rate": 2.891175393628111e-06, + "loss": 0.1492919921875, + "step": 11925 + }, + { + "epoch": 0.806137623360822, + "grad_norm": 0.4444180130958557, + "learning_rate": 2.889228294478485e-06, + "loss": 0.098297119140625, + "step": 11926 + }, + { + "epoch": 0.8062052183317561, + "grad_norm": 1.3657218217849731, + "learning_rate": 2.8872817813233067e-06, + "loss": 0.182373046875, + "step": 11927 + }, + { + "epoch": 0.8062728133026903, + "grad_norm": 0.29333019256591797, + "learning_rate": 2.8853358542567725e-06, + "loss": 0.05321502685546875, + "step": 11928 + }, + { + "epoch": 0.8063404082736244, + "grad_norm": 0.8255020380020142, + "learning_rate": 2.883390513373021e-06, + "loss": 0.10782623291015625, + "step": 11929 + }, + { + "epoch": 0.8064080032445586, + "grad_norm": 0.7027264833450317, + "learning_rate": 2.881445758766196e-06, + "loss": 0.124176025390625, + "step": 11930 + }, + { + "epoch": 0.8064755982154928, + "grad_norm": 0.4221940040588379, + "learning_rate": 2.8795015905303834e-06, + "loss": 0.06875991821289062, + "step": 11931 + }, + { + "epoch": 0.806543193186427, + "grad_norm": 0.819140613079071, + "learning_rate": 2.8775580087596635e-06, + "loss": 0.12583160400390625, + "step": 11932 + }, + { + "epoch": 0.8066107881573611, + "grad_norm": 0.9055616855621338, + "learning_rate": 2.87561501354808e-06, + "loss": 0.1408538818359375, + "step": 11933 + }, + { + "epoch": 0.8066783831282952, + "grad_norm": 1.1929967403411865, + "learning_rate": 2.8736726049896363e-06, + "loss": 0.12999725341796875, + "step": 11934 + }, + { + "epoch": 0.8067459780992294, + "grad_norm": 1.2151811122894287, + "learning_rate": 2.871730783178326e-06, + "loss": 0.202789306640625, + "step": 11935 + }, + { + "epoch": 0.8068135730701635, + "grad_norm": 0.895693302154541, + "learning_rate": 2.8697895482081073e-06, + "loss": 0.15362548828125, + "step": 11936 + }, + { + "epoch": 0.8068811680410978, + "grad_norm": 0.2714342772960663, + "learning_rate": 2.8678489001729068e-06, + "loss": 0.046539306640625, + "step": 11937 + }, + { + "epoch": 0.8069487630120319, + "grad_norm": 0.38737615942955017, + "learning_rate": 2.8659088391666253e-06, + "loss": 0.07652664184570312, + "step": 11938 + }, + { + "epoch": 0.8070163579829661, + "grad_norm": 0.5503314733505249, + "learning_rate": 2.863969365283136e-06, + "loss": 0.078521728515625, + "step": 11939 + }, + { + "epoch": 0.8070839529539002, + "grad_norm": 0.8068704009056091, + "learning_rate": 2.862030478616277e-06, + "loss": 0.13700485229492188, + "step": 11940 + }, + { + "epoch": 0.8071515479248343, + "grad_norm": 0.8793473839759827, + "learning_rate": 2.8600921792598782e-06, + "loss": 0.12166976928710938, + "step": 11941 + }, + { + "epoch": 0.8072191428957686, + "grad_norm": 0.7634309530258179, + "learning_rate": 2.858154467307708e-06, + "loss": 0.1393585205078125, + "step": 11942 + }, + { + "epoch": 0.8072867378667027, + "grad_norm": 0.7984532713890076, + "learning_rate": 2.8562173428535363e-06, + "loss": 0.130096435546875, + "step": 11943 + }, + { + "epoch": 0.8073543328376369, + "grad_norm": 0.6443790197372437, + "learning_rate": 2.854280805991097e-06, + "loss": 0.1014251708984375, + "step": 11944 + }, + { + "epoch": 0.807421927808571, + "grad_norm": 0.509346067905426, + "learning_rate": 2.8523448568140777e-06, + "loss": 0.10216522216796875, + "step": 11945 + }, + { + "epoch": 0.8074895227795053, + "grad_norm": 0.24115140736103058, + "learning_rate": 2.850409495416167e-06, + "loss": 0.04146575927734375, + "step": 11946 + }, + { + "epoch": 0.8075571177504394, + "grad_norm": 0.5551262497901917, + "learning_rate": 2.8484747218909952e-06, + "loss": 0.09958648681640625, + "step": 11947 + }, + { + "epoch": 0.8076247127213735, + "grad_norm": 0.41864147782325745, + "learning_rate": 2.846540536332189e-06, + "loss": 0.07440567016601562, + "step": 11948 + }, + { + "epoch": 0.8076923076923077, + "grad_norm": 0.330493688583374, + "learning_rate": 2.8446069388333333e-06, + "loss": 0.05255889892578125, + "step": 11949 + }, + { + "epoch": 0.8077599026632418, + "grad_norm": 0.26412948966026306, + "learning_rate": 2.842673929487989e-06, + "loss": 0.04047393798828125, + "step": 11950 + }, + { + "epoch": 0.807827497634176, + "grad_norm": 0.6963129639625549, + "learning_rate": 2.840741508389684e-06, + "loss": 0.140350341796875, + "step": 11951 + }, + { + "epoch": 0.8078950926051102, + "grad_norm": 0.29177388548851013, + "learning_rate": 2.8388096756319216e-06, + "loss": 0.05907440185546875, + "step": 11952 + }, + { + "epoch": 0.8079626875760444, + "grad_norm": 0.7729612588882446, + "learning_rate": 2.836878431308174e-06, + "loss": 0.12140655517578125, + "step": 11953 + }, + { + "epoch": 0.8080302825469785, + "grad_norm": 2.0190460681915283, + "learning_rate": 2.8349477755118984e-06, + "loss": 0.117584228515625, + "step": 11954 + }, + { + "epoch": 0.8080978775179126, + "grad_norm": 1.8496965169906616, + "learning_rate": 2.8330177083364983e-06, + "loss": 0.128448486328125, + "step": 11955 + }, + { + "epoch": 0.8081654724888468, + "grad_norm": 0.25519034266471863, + "learning_rate": 2.8310882298753622e-06, + "loss": 0.028228759765625, + "step": 11956 + }, + { + "epoch": 0.808233067459781, + "grad_norm": 0.7592877745628357, + "learning_rate": 2.8291593402218636e-06, + "loss": 0.12261199951171875, + "step": 11957 + }, + { + "epoch": 0.8083006624307152, + "grad_norm": 1.0669294595718384, + "learning_rate": 2.827231039469318e-06, + "loss": 0.20001220703125, + "step": 11958 + }, + { + "epoch": 0.8083682574016493, + "grad_norm": 0.5923193693161011, + "learning_rate": 2.8253033277110407e-06, + "loss": 0.07292556762695312, + "step": 11959 + }, + { + "epoch": 0.8084358523725835, + "grad_norm": 0.6561381816864014, + "learning_rate": 2.823376205040302e-06, + "loss": 0.1283111572265625, + "step": 11960 + }, + { + "epoch": 0.8085034473435176, + "grad_norm": 1.145201325416565, + "learning_rate": 2.821449671550348e-06, + "loss": 0.11655044555664062, + "step": 11961 + }, + { + "epoch": 0.8085710423144518, + "grad_norm": 1.2977644205093384, + "learning_rate": 2.8195237273343964e-06, + "loss": 0.235504150390625, + "step": 11962 + }, + { + "epoch": 0.808638637285386, + "grad_norm": 0.6977248787879944, + "learning_rate": 2.817598372485637e-06, + "loss": 0.1595916748046875, + "step": 11963 + }, + { + "epoch": 0.8087062322563201, + "grad_norm": 0.41617342829704285, + "learning_rate": 2.8156736070972276e-06, + "loss": 0.074249267578125, + "step": 11964 + }, + { + "epoch": 0.8087738272272543, + "grad_norm": 0.6176397800445557, + "learning_rate": 2.8137494312623046e-06, + "loss": 0.0727853775024414, + "step": 11965 + }, + { + "epoch": 0.8088414221981884, + "grad_norm": 0.4977167844772339, + "learning_rate": 2.8118258450739663e-06, + "loss": 0.0978546142578125, + "step": 11966 + }, + { + "epoch": 0.8089090171691227, + "grad_norm": 0.6158702373504639, + "learning_rate": 2.809902848625297e-06, + "loss": 0.154266357421875, + "step": 11967 + }, + { + "epoch": 0.8089766121400568, + "grad_norm": 0.8189990520477295, + "learning_rate": 2.807980442009335e-06, + "loss": 0.1269512176513672, + "step": 11968 + }, + { + "epoch": 0.8090442071109909, + "grad_norm": 1.248844027519226, + "learning_rate": 2.806058625319095e-06, + "loss": 0.1492462158203125, + "step": 11969 + }, + { + "epoch": 0.8091118020819251, + "grad_norm": 0.5073579549789429, + "learning_rate": 2.804137398647582e-06, + "loss": 0.106842041015625, + "step": 11970 + }, + { + "epoch": 0.8091793970528592, + "grad_norm": 0.46905332803726196, + "learning_rate": 2.802216762087737e-06, + "loss": 0.1092071533203125, + "step": 11971 + }, + { + "epoch": 0.8092469920237935, + "grad_norm": 0.4589821398258209, + "learning_rate": 2.800296715732507e-06, + "loss": 0.0928497314453125, + "step": 11972 + }, + { + "epoch": 0.8093145869947276, + "grad_norm": 0.380696564912796, + "learning_rate": 2.7983772596747913e-06, + "loss": 0.06040191650390625, + "step": 11973 + }, + { + "epoch": 0.8093821819656618, + "grad_norm": 0.9217743873596191, + "learning_rate": 2.7964583940074656e-06, + "loss": 0.1818695068359375, + "step": 11974 + }, + { + "epoch": 0.8094497769365959, + "grad_norm": 0.6212442517280579, + "learning_rate": 2.7945401188233738e-06, + "loss": 0.08657455444335938, + "step": 11975 + }, + { + "epoch": 0.80951737190753, + "grad_norm": 0.669558584690094, + "learning_rate": 2.7926224342153365e-06, + "loss": 0.126068115234375, + "step": 11976 + }, + { + "epoch": 0.8095849668784643, + "grad_norm": 0.1610986739397049, + "learning_rate": 2.7907053402761435e-06, + "loss": 0.015224456787109375, + "step": 11977 + }, + { + "epoch": 0.8096525618493984, + "grad_norm": 0.5467836260795593, + "learning_rate": 2.7887888370985548e-06, + "loss": 0.05241584777832031, + "step": 11978 + }, + { + "epoch": 0.8097201568203326, + "grad_norm": 0.6093137264251709, + "learning_rate": 2.7868729247753032e-06, + "loss": 0.12767791748046875, + "step": 11979 + }, + { + "epoch": 0.8097877517912667, + "grad_norm": 1.4763013124465942, + "learning_rate": 2.7849576033990915e-06, + "loss": 0.19580078125, + "step": 11980 + }, + { + "epoch": 0.809855346762201, + "grad_norm": 0.5431244969367981, + "learning_rate": 2.783042873062595e-06, + "loss": 0.07241058349609375, + "step": 11981 + }, + { + "epoch": 0.8099229417331351, + "grad_norm": 0.7315638065338135, + "learning_rate": 2.7811287338584584e-06, + "loss": 0.125091552734375, + "step": 11982 + }, + { + "epoch": 0.8099905367040692, + "grad_norm": 0.23358522355556488, + "learning_rate": 2.779215185879309e-06, + "loss": 0.04131317138671875, + "step": 11983 + }, + { + "epoch": 0.8100581316750034, + "grad_norm": 0.42109712958335876, + "learning_rate": 2.7773022292177214e-06, + "loss": 0.07032012939453125, + "step": 11984 + }, + { + "epoch": 0.8101257266459375, + "grad_norm": 0.8109912872314453, + "learning_rate": 2.775389863966268e-06, + "loss": 0.14495849609375, + "step": 11985 + }, + { + "epoch": 0.8101933216168717, + "grad_norm": 0.6636527180671692, + "learning_rate": 2.7734780902174763e-06, + "loss": 0.121917724609375, + "step": 11986 + }, + { + "epoch": 0.8102609165878059, + "grad_norm": 0.3758852779865265, + "learning_rate": 2.7715669080638507e-06, + "loss": 0.06371688842773438, + "step": 11987 + }, + { + "epoch": 0.8103285115587401, + "grad_norm": 1.4555248022079468, + "learning_rate": 2.7696563175978674e-06, + "loss": 0.2113037109375, + "step": 11988 + }, + { + "epoch": 0.8103961065296742, + "grad_norm": 0.2640495300292969, + "learning_rate": 2.767746318911969e-06, + "loss": 0.023164749145507812, + "step": 11989 + }, + { + "epoch": 0.8104637015006083, + "grad_norm": 0.3541126549243927, + "learning_rate": 2.7658369120985734e-06, + "loss": 0.043712615966796875, + "step": 11990 + }, + { + "epoch": 0.8105312964715425, + "grad_norm": 0.6194444894790649, + "learning_rate": 2.763928097250078e-06, + "loss": 0.12654876708984375, + "step": 11991 + }, + { + "epoch": 0.8105988914424767, + "grad_norm": 0.8144412636756897, + "learning_rate": 2.7620198744588325e-06, + "loss": 0.100860595703125, + "step": 11992 + }, + { + "epoch": 0.8106664864134109, + "grad_norm": 0.5029328465461731, + "learning_rate": 2.7601122438171734e-06, + "loss": 0.07521247863769531, + "step": 11993 + }, + { + "epoch": 0.810734081384345, + "grad_norm": 0.8763189315795898, + "learning_rate": 2.758205205417401e-06, + "loss": 0.132843017578125, + "step": 11994 + }, + { + "epoch": 0.8108016763552792, + "grad_norm": 0.7925698757171631, + "learning_rate": 2.7562987593517906e-06, + "loss": 0.11017608642578125, + "step": 11995 + }, + { + "epoch": 0.8108692713262133, + "grad_norm": 0.8088992834091187, + "learning_rate": 2.754392905712595e-06, + "loss": 0.19561767578125, + "step": 11996 + }, + { + "epoch": 0.8109368662971475, + "grad_norm": 0.3342359662055969, + "learning_rate": 2.7524876445920184e-06, + "loss": 0.06572723388671875, + "step": 11997 + }, + { + "epoch": 0.8110044612680817, + "grad_norm": 0.7510942816734314, + "learning_rate": 2.750582976082259e-06, + "loss": 0.145721435546875, + "step": 11998 + }, + { + "epoch": 0.8110720562390158, + "grad_norm": 0.63057941198349, + "learning_rate": 2.7486789002754743e-06, + "loss": 0.09313201904296875, + "step": 11999 + }, + { + "epoch": 0.81113965120995, + "grad_norm": 1.0750435590744019, + "learning_rate": 2.746775417263794e-06, + "loss": 0.15789794921875, + "step": 12000 + }, + { + "epoch": 0.8112072461808841, + "grad_norm": 0.4511137902736664, + "learning_rate": 2.7448725271393223e-06, + "loss": 0.06784820556640625, + "step": 12001 + }, + { + "epoch": 0.8112748411518184, + "grad_norm": 0.6235944628715515, + "learning_rate": 2.7429702299941305e-06, + "loss": 0.10696029663085938, + "step": 12002 + }, + { + "epoch": 0.8113424361227525, + "grad_norm": 1.113876223564148, + "learning_rate": 2.7410685259202644e-06, + "loss": 0.1762542724609375, + "step": 12003 + }, + { + "epoch": 0.8114100310936866, + "grad_norm": 0.7874533534049988, + "learning_rate": 2.73916741500974e-06, + "loss": 0.180389404296875, + "step": 12004 + }, + { + "epoch": 0.8114776260646208, + "grad_norm": 1.5713145732879639, + "learning_rate": 2.7372668973545474e-06, + "loss": 0.1504669189453125, + "step": 12005 + }, + { + "epoch": 0.8115452210355549, + "grad_norm": 0.586207926273346, + "learning_rate": 2.7353669730466373e-06, + "loss": 0.10248184204101562, + "step": 12006 + }, + { + "epoch": 0.8116128160064892, + "grad_norm": 1.3902606964111328, + "learning_rate": 2.733467642177955e-06, + "loss": 0.15856170654296875, + "step": 12007 + }, + { + "epoch": 0.8116804109774233, + "grad_norm": 0.5898807048797607, + "learning_rate": 2.7315689048403843e-06, + "loss": 0.1244354248046875, + "step": 12008 + }, + { + "epoch": 0.8117480059483574, + "grad_norm": 0.853092610836029, + "learning_rate": 2.7296707611258153e-06, + "loss": 0.134307861328125, + "step": 12009 + }, + { + "epoch": 0.8118156009192916, + "grad_norm": 0.2612263262271881, + "learning_rate": 2.7277732111260756e-06, + "loss": 0.033077239990234375, + "step": 12010 + }, + { + "epoch": 0.8118831958902257, + "grad_norm": 0.32735690474510193, + "learning_rate": 2.725876254932991e-06, + "loss": 0.05533599853515625, + "step": 12011 + }, + { + "epoch": 0.81195079086116, + "grad_norm": 0.6957637667655945, + "learning_rate": 2.7239798926383456e-06, + "loss": 0.10372543334960938, + "step": 12012 + }, + { + "epoch": 0.8120183858320941, + "grad_norm": 1.1952288150787354, + "learning_rate": 2.7220841243338955e-06, + "loss": 0.1323089599609375, + "step": 12013 + }, + { + "epoch": 0.8120859808030283, + "grad_norm": 0.927832305431366, + "learning_rate": 2.7201889501113723e-06, + "loss": 0.15240859985351562, + "step": 12014 + }, + { + "epoch": 0.8121535757739624, + "grad_norm": 0.40569737553596497, + "learning_rate": 2.7182943700624763e-06, + "loss": 0.08023834228515625, + "step": 12015 + }, + { + "epoch": 0.8122211707448965, + "grad_norm": 0.35881564021110535, + "learning_rate": 2.7164003842788755e-06, + "loss": 0.059543609619140625, + "step": 12016 + }, + { + "epoch": 0.8122887657158308, + "grad_norm": 0.3853023052215576, + "learning_rate": 2.7145069928522163e-06, + "loss": 0.0748291015625, + "step": 12017 + }, + { + "epoch": 0.8123563606867649, + "grad_norm": 1.4455126523971558, + "learning_rate": 2.7126141958741113e-06, + "loss": 0.249114990234375, + "step": 12018 + }, + { + "epoch": 0.8124239556576991, + "grad_norm": 1.0555660724639893, + "learning_rate": 2.7107219934361417e-06, + "loss": 0.1690216064453125, + "step": 12019 + }, + { + "epoch": 0.8124915506286332, + "grad_norm": 0.7367778420448303, + "learning_rate": 2.7088303856298755e-06, + "loss": 0.1321563720703125, + "step": 12020 + }, + { + "epoch": 0.8125591455995674, + "grad_norm": 0.3686806261539459, + "learning_rate": 2.706939372546827e-06, + "loss": 0.03123188018798828, + "step": 12021 + }, + { + "epoch": 0.8126267405705015, + "grad_norm": 0.839079737663269, + "learning_rate": 2.7050489542785077e-06, + "loss": 0.1275634765625, + "step": 12022 + }, + { + "epoch": 0.8126943355414357, + "grad_norm": 0.38525545597076416, + "learning_rate": 2.7031591309163744e-06, + "loss": 0.076446533203125, + "step": 12023 + }, + { + "epoch": 0.8127619305123699, + "grad_norm": 1.0308897495269775, + "learning_rate": 2.701269902551879e-06, + "loss": 0.1245880126953125, + "step": 12024 + }, + { + "epoch": 0.812829525483304, + "grad_norm": 1.040477991104126, + "learning_rate": 2.6993812692764314e-06, + "loss": 0.170989990234375, + "step": 12025 + }, + { + "epoch": 0.8128971204542382, + "grad_norm": 0.6623684763908386, + "learning_rate": 2.697493231181414e-06, + "loss": 0.11975479125976562, + "step": 12026 + }, + { + "epoch": 0.8129647154251723, + "grad_norm": 0.39426189661026, + "learning_rate": 2.695605788358183e-06, + "loss": 0.06617355346679688, + "step": 12027 + }, + { + "epoch": 0.8130323103961066, + "grad_norm": 0.570603609085083, + "learning_rate": 2.6937189408980644e-06, + "loss": 0.1066741943359375, + "step": 12028 + }, + { + "epoch": 0.8130999053670407, + "grad_norm": 0.29788491129875183, + "learning_rate": 2.691832688892355e-06, + "loss": 0.0470428466796875, + "step": 12029 + }, + { + "epoch": 0.8131675003379748, + "grad_norm": 0.3836553394794464, + "learning_rate": 2.6899470324323233e-06, + "loss": 0.06839752197265625, + "step": 12030 + }, + { + "epoch": 0.813235095308909, + "grad_norm": 0.5255441069602966, + "learning_rate": 2.6880619716092098e-06, + "loss": 0.088653564453125, + "step": 12031 + }, + { + "epoch": 0.8133026902798431, + "grad_norm": 0.6996358633041382, + "learning_rate": 2.686177506514221e-06, + "loss": 0.10962677001953125, + "step": 12032 + }, + { + "epoch": 0.8133702852507774, + "grad_norm": 0.3784797191619873, + "learning_rate": 2.684293637238552e-06, + "loss": 0.049072265625, + "step": 12033 + }, + { + "epoch": 0.8134378802217115, + "grad_norm": 0.7140653133392334, + "learning_rate": 2.68241036387334e-06, + "loss": 0.1206512451171875, + "step": 12034 + }, + { + "epoch": 0.8135054751926457, + "grad_norm": 0.7928275465965271, + "learning_rate": 2.6805276865097193e-06, + "loss": 0.13088226318359375, + "step": 12035 + }, + { + "epoch": 0.8135730701635798, + "grad_norm": 0.4019705057144165, + "learning_rate": 2.6786456052387843e-06, + "loss": 0.05947113037109375, + "step": 12036 + }, + { + "epoch": 0.8136406651345139, + "grad_norm": 1.2227572202682495, + "learning_rate": 2.6767641201515996e-06, + "loss": 0.189788818359375, + "step": 12037 + }, + { + "epoch": 0.8137082601054482, + "grad_norm": 0.5421315431594849, + "learning_rate": 2.6748832313392094e-06, + "loss": 0.130828857421875, + "step": 12038 + }, + { + "epoch": 0.8137758550763823, + "grad_norm": 0.5382332801818848, + "learning_rate": 2.673002938892608e-06, + "loss": 0.08187103271484375, + "step": 12039 + }, + { + "epoch": 0.8138434500473165, + "grad_norm": 1.386143445968628, + "learning_rate": 2.6711232429027905e-06, + "loss": 0.2177734375, + "step": 12040 + }, + { + "epoch": 0.8139110450182506, + "grad_norm": 0.6027222871780396, + "learning_rate": 2.6692441434607024e-06, + "loss": 0.1107025146484375, + "step": 12041 + }, + { + "epoch": 0.8139786399891848, + "grad_norm": 1.3308498859405518, + "learning_rate": 2.667365640657268e-06, + "loss": 0.13007354736328125, + "step": 12042 + }, + { + "epoch": 0.814046234960119, + "grad_norm": 0.2182886153459549, + "learning_rate": 2.6654877345833773e-06, + "loss": 0.032123565673828125, + "step": 12043 + }, + { + "epoch": 0.8141138299310531, + "grad_norm": 0.5075144171714783, + "learning_rate": 2.6636104253298997e-06, + "loss": 0.0827484130859375, + "step": 12044 + }, + { + "epoch": 0.8141814249019873, + "grad_norm": 0.1971336305141449, + "learning_rate": 2.661733712987663e-06, + "loss": 0.030500411987304688, + "step": 12045 + }, + { + "epoch": 0.8142490198729214, + "grad_norm": 0.7709094882011414, + "learning_rate": 2.659857597647489e-06, + "loss": 0.11077117919921875, + "step": 12046 + }, + { + "epoch": 0.8143166148438556, + "grad_norm": 0.8242018222808838, + "learning_rate": 2.6579820794001397e-06, + "loss": 0.1311798095703125, + "step": 12047 + }, + { + "epoch": 0.8143842098147898, + "grad_norm": 0.7174562215805054, + "learning_rate": 2.6561071583363748e-06, + "loss": 0.08751678466796875, + "step": 12048 + }, + { + "epoch": 0.814451804785724, + "grad_norm": 0.41003960371017456, + "learning_rate": 2.6542328345469154e-06, + "loss": 0.0814208984375, + "step": 12049 + }, + { + "epoch": 0.8145193997566581, + "grad_norm": 0.712781548500061, + "learning_rate": 2.6523591081224413e-06, + "loss": 0.0965118408203125, + "step": 12050 + }, + { + "epoch": 0.8145869947275922, + "grad_norm": 0.9108577370643616, + "learning_rate": 2.650485979153627e-06, + "loss": 0.1744384765625, + "step": 12051 + }, + { + "epoch": 0.8146545896985264, + "grad_norm": 0.6367635130882263, + "learning_rate": 2.648613447731101e-06, + "loss": 0.133453369140625, + "step": 12052 + }, + { + "epoch": 0.8147221846694606, + "grad_norm": 0.9128276705741882, + "learning_rate": 2.6467415139454693e-06, + "loss": 0.12309646606445312, + "step": 12053 + }, + { + "epoch": 0.8147897796403948, + "grad_norm": 0.7796016335487366, + "learning_rate": 2.644870177887307e-06, + "loss": 0.1391143798828125, + "step": 12054 + }, + { + "epoch": 0.8148573746113289, + "grad_norm": 1.7140700817108154, + "learning_rate": 2.6429994396471612e-06, + "loss": 0.16632080078125, + "step": 12055 + }, + { + "epoch": 0.8149249695822631, + "grad_norm": 0.3713087737560272, + "learning_rate": 2.64112929931555e-06, + "loss": 0.077911376953125, + "step": 12056 + }, + { + "epoch": 0.8149925645531972, + "grad_norm": 0.4837830364704132, + "learning_rate": 2.6392597569829623e-06, + "loss": 0.0854644775390625, + "step": 12057 + }, + { + "epoch": 0.8150601595241314, + "grad_norm": 1.1302670240402222, + "learning_rate": 2.6373908127398545e-06, + "loss": 0.12757110595703125, + "step": 12058 + }, + { + "epoch": 0.8151277544950656, + "grad_norm": 1.1907716989517212, + "learning_rate": 2.6355224666766688e-06, + "loss": 0.1748199462890625, + "step": 12059 + }, + { + "epoch": 0.8151953494659997, + "grad_norm": 0.4631696045398712, + "learning_rate": 2.6336547188837926e-06, + "loss": 0.06196784973144531, + "step": 12060 + }, + { + "epoch": 0.8152629444369339, + "grad_norm": 0.3524347245693207, + "learning_rate": 2.6317875694516113e-06, + "loss": 0.05387115478515625, + "step": 12061 + }, + { + "epoch": 0.815330539407868, + "grad_norm": 0.45164602994918823, + "learning_rate": 2.629921018470467e-06, + "loss": 0.0852813720703125, + "step": 12062 + }, + { + "epoch": 0.8153981343788023, + "grad_norm": 0.3960510790348053, + "learning_rate": 2.6280550660306657e-06, + "loss": 0.08050537109375, + "step": 12063 + }, + { + "epoch": 0.8154657293497364, + "grad_norm": 0.8925312757492065, + "learning_rate": 2.6261897122225036e-06, + "loss": 0.1411285400390625, + "step": 12064 + }, + { + "epoch": 0.8155333243206705, + "grad_norm": 0.735984742641449, + "learning_rate": 2.6243249571362353e-06, + "loss": 0.08705902099609375, + "step": 12065 + }, + { + "epoch": 0.8156009192916047, + "grad_norm": 1.0451675653457642, + "learning_rate": 2.6224608008620885e-06, + "loss": 0.1286773681640625, + "step": 12066 + }, + { + "epoch": 0.8156685142625388, + "grad_norm": 0.35697612166404724, + "learning_rate": 2.6205972434902646e-06, + "loss": 0.05263519287109375, + "step": 12067 + }, + { + "epoch": 0.8157361092334731, + "grad_norm": 0.5320385098457336, + "learning_rate": 2.618734285110933e-06, + "loss": 0.07523345947265625, + "step": 12068 + }, + { + "epoch": 0.8158037042044072, + "grad_norm": 0.34287217259407043, + "learning_rate": 2.6168719258142333e-06, + "loss": 0.047275543212890625, + "step": 12069 + }, + { + "epoch": 0.8158712991753414, + "grad_norm": 0.6626389026641846, + "learning_rate": 2.6150101656902796e-06, + "loss": 0.127838134765625, + "step": 12070 + }, + { + "epoch": 0.8159388941462755, + "grad_norm": 0.7929197549819946, + "learning_rate": 2.613149004829154e-06, + "loss": 0.186614990234375, + "step": 12071 + }, + { + "epoch": 0.8160064891172096, + "grad_norm": 0.684385359287262, + "learning_rate": 2.6112884433209188e-06, + "loss": 0.122833251953125, + "step": 12072 + }, + { + "epoch": 0.8160740840881439, + "grad_norm": 0.7110586166381836, + "learning_rate": 2.609428481255589e-06, + "loss": 0.11315155029296875, + "step": 12073 + }, + { + "epoch": 0.816141679059078, + "grad_norm": 0.3707224428653717, + "learning_rate": 2.6075691187231627e-06, + "loss": 0.060924530029296875, + "step": 12074 + }, + { + "epoch": 0.8162092740300122, + "grad_norm": 0.7561966776847839, + "learning_rate": 2.6057103558136157e-06, + "loss": 0.1433258056640625, + "step": 12075 + }, + { + "epoch": 0.8162768690009463, + "grad_norm": 0.6490762829780579, + "learning_rate": 2.603852192616875e-06, + "loss": 0.0898590087890625, + "step": 12076 + }, + { + "epoch": 0.8163444639718805, + "grad_norm": 0.24914979934692383, + "learning_rate": 2.6019946292228604e-06, + "loss": 0.032928466796875, + "step": 12077 + }, + { + "epoch": 0.8164120589428147, + "grad_norm": 1.0769634246826172, + "learning_rate": 2.600137665721446e-06, + "loss": 0.1326446533203125, + "step": 12078 + }, + { + "epoch": 0.8164796539137488, + "grad_norm": 0.48101532459259033, + "learning_rate": 2.598281302202487e-06, + "loss": 0.08666229248046875, + "step": 12079 + }, + { + "epoch": 0.816547248884683, + "grad_norm": 0.48935210704803467, + "learning_rate": 2.596425538755803e-06, + "loss": 0.0716705322265625, + "step": 12080 + }, + { + "epoch": 0.8166148438556171, + "grad_norm": 0.5833423137664795, + "learning_rate": 2.5945703754711895e-06, + "loss": 0.11200714111328125, + "step": 12081 + }, + { + "epoch": 0.8166824388265513, + "grad_norm": 0.2762545347213745, + "learning_rate": 2.592715812438406e-06, + "loss": 0.03874969482421875, + "step": 12082 + }, + { + "epoch": 0.8167500337974855, + "grad_norm": 0.4058223068714142, + "learning_rate": 2.590861849747197e-06, + "loss": 0.0828857421875, + "step": 12083 + }, + { + "epoch": 0.8168176287684197, + "grad_norm": 1.0937620401382446, + "learning_rate": 2.589008487487261e-06, + "loss": 0.1478424072265625, + "step": 12084 + }, + { + "epoch": 0.8168852237393538, + "grad_norm": 0.6651003956794739, + "learning_rate": 2.587155725748278e-06, + "loss": 0.12172698974609375, + "step": 12085 + }, + { + "epoch": 0.8169528187102879, + "grad_norm": 0.3998529613018036, + "learning_rate": 2.5853035646198946e-06, + "loss": 0.05098724365234375, + "step": 12086 + }, + { + "epoch": 0.8170204136812221, + "grad_norm": 0.7644206285476685, + "learning_rate": 2.583452004191729e-06, + "loss": 0.148712158203125, + "step": 12087 + }, + { + "epoch": 0.8170880086521563, + "grad_norm": 0.35150229930877686, + "learning_rate": 2.5816010445533784e-06, + "loss": 0.05437469482421875, + "step": 12088 + }, + { + "epoch": 0.8171556036230905, + "grad_norm": 0.3931575119495392, + "learning_rate": 2.579750685794392e-06, + "loss": 0.06706047058105469, + "step": 12089 + }, + { + "epoch": 0.8172231985940246, + "grad_norm": 0.26093462109565735, + "learning_rate": 2.577900928004312e-06, + "loss": 0.04845428466796875, + "step": 12090 + }, + { + "epoch": 0.8172907935649588, + "grad_norm": 0.8016977906227112, + "learning_rate": 2.576051771272637e-06, + "loss": 0.1571197509765625, + "step": 12091 + }, + { + "epoch": 0.8173583885358929, + "grad_norm": 0.6200262308120728, + "learning_rate": 2.5742032156888413e-06, + "loss": 0.14996337890625, + "step": 12092 + }, + { + "epoch": 0.817425983506827, + "grad_norm": 0.5458573698997498, + "learning_rate": 2.572355261342369e-06, + "loss": 0.07805633544921875, + "step": 12093 + }, + { + "epoch": 0.8174935784777613, + "grad_norm": 1.2988560199737549, + "learning_rate": 2.5705079083226366e-06, + "loss": 0.1833343505859375, + "step": 12094 + }, + { + "epoch": 0.8175611734486954, + "grad_norm": 0.6224924325942993, + "learning_rate": 2.568661156719025e-06, + "loss": 0.10302352905273438, + "step": 12095 + }, + { + "epoch": 0.8176287684196296, + "grad_norm": 0.6428162455558777, + "learning_rate": 2.5668150066209033e-06, + "loss": 0.10239410400390625, + "step": 12096 + }, + { + "epoch": 0.8176963633905637, + "grad_norm": 0.19806087017059326, + "learning_rate": 2.564969458117591e-06, + "loss": 0.030202865600585938, + "step": 12097 + }, + { + "epoch": 0.817763958361498, + "grad_norm": 1.0689520835876465, + "learning_rate": 2.563124511298384e-06, + "loss": 0.159576416015625, + "step": 12098 + }, + { + "epoch": 0.8178315533324321, + "grad_norm": 0.36460164189338684, + "learning_rate": 2.561280166252564e-06, + "loss": 0.0711212158203125, + "step": 12099 + }, + { + "epoch": 0.8178991483033662, + "grad_norm": 0.7851198315620422, + "learning_rate": 2.5594364230693577e-06, + "loss": 0.148590087890625, + "step": 12100 + }, + { + "epoch": 0.8179667432743004, + "grad_norm": 0.46909743547439575, + "learning_rate": 2.557593281837994e-06, + "loss": 0.07105255126953125, + "step": 12101 + }, + { + "epoch": 0.8180343382452345, + "grad_norm": 0.4344313144683838, + "learning_rate": 2.5557507426476367e-06, + "loss": 0.0884552001953125, + "step": 12102 + }, + { + "epoch": 0.8181019332161688, + "grad_norm": 1.4736500978469849, + "learning_rate": 2.553908805587452e-06, + "loss": 0.189056396484375, + "step": 12103 + }, + { + "epoch": 0.8181695281871029, + "grad_norm": 1.3652081489562988, + "learning_rate": 2.55206747074656e-06, + "loss": 0.15196990966796875, + "step": 12104 + }, + { + "epoch": 0.8182371231580371, + "grad_norm": 0.24849262833595276, + "learning_rate": 2.5502267382140583e-06, + "loss": 0.05323028564453125, + "step": 12105 + }, + { + "epoch": 0.8183047181289712, + "grad_norm": 0.4411906599998474, + "learning_rate": 2.5483866080790097e-06, + "loss": 0.07184600830078125, + "step": 12106 + }, + { + "epoch": 0.8183723130999053, + "grad_norm": 0.3006114363670349, + "learning_rate": 2.5465470804304535e-06, + "loss": 0.03945159912109375, + "step": 12107 + }, + { + "epoch": 0.8184399080708396, + "grad_norm": 0.400327205657959, + "learning_rate": 2.544708155357397e-06, + "loss": 0.07427978515625, + "step": 12108 + }, + { + "epoch": 0.8185075030417737, + "grad_norm": 0.8744378089904785, + "learning_rate": 2.542869832948819e-06, + "loss": 0.1161041259765625, + "step": 12109 + }, + { + "epoch": 0.8185750980127079, + "grad_norm": 1.1568671464920044, + "learning_rate": 2.5410321132936674e-06, + "loss": 0.1754150390625, + "step": 12110 + }, + { + "epoch": 0.818642692983642, + "grad_norm": 0.7055901885032654, + "learning_rate": 2.539194996480861e-06, + "loss": 0.11556243896484375, + "step": 12111 + }, + { + "epoch": 0.8187102879545762, + "grad_norm": 0.4650174379348755, + "learning_rate": 2.537358482599301e-06, + "loss": 0.09765625, + "step": 12112 + }, + { + "epoch": 0.8187778829255103, + "grad_norm": 1.4449124336242676, + "learning_rate": 2.5355225717378342e-06, + "loss": 0.183258056640625, + "step": 12113 + }, + { + "epoch": 0.8188454778964445, + "grad_norm": 0.3750022053718567, + "learning_rate": 2.5336872639853077e-06, + "loss": 0.05022239685058594, + "step": 12114 + }, + { + "epoch": 0.8189130728673787, + "grad_norm": 0.22837470471858978, + "learning_rate": 2.5318525594305124e-06, + "loss": 0.030185699462890625, + "step": 12115 + }, + { + "epoch": 0.8189806678383128, + "grad_norm": 0.49021026492118835, + "learning_rate": 2.5300184581622315e-06, + "loss": 0.1036224365234375, + "step": 12116 + }, + { + "epoch": 0.819048262809247, + "grad_norm": 1.21903657913208, + "learning_rate": 2.5281849602692085e-06, + "loss": 0.16326904296875, + "step": 12117 + }, + { + "epoch": 0.8191158577801811, + "grad_norm": 0.5242271423339844, + "learning_rate": 2.5263520658401577e-06, + "loss": 0.08808135986328125, + "step": 12118 + }, + { + "epoch": 0.8191834527511154, + "grad_norm": 0.8640698194503784, + "learning_rate": 2.5245197749637665e-06, + "loss": 0.1225738525390625, + "step": 12119 + }, + { + "epoch": 0.8192510477220495, + "grad_norm": 0.5664605498313904, + "learning_rate": 2.522688087728694e-06, + "loss": 0.09502410888671875, + "step": 12120 + }, + { + "epoch": 0.8193186426929836, + "grad_norm": 0.378298282623291, + "learning_rate": 2.5208570042235673e-06, + "loss": 0.05209064483642578, + "step": 12121 + }, + { + "epoch": 0.8193862376639178, + "grad_norm": 0.7779760956764221, + "learning_rate": 2.519026524536984e-06, + "loss": 0.177459716796875, + "step": 12122 + }, + { + "epoch": 0.8194538326348519, + "grad_norm": 0.27683117985725403, + "learning_rate": 2.517196648757517e-06, + "loss": 0.042308807373046875, + "step": 12123 + }, + { + "epoch": 0.8195214276057862, + "grad_norm": 0.8080636262893677, + "learning_rate": 2.5153673769737032e-06, + "loss": 0.12426948547363281, + "step": 12124 + }, + { + "epoch": 0.8195890225767203, + "grad_norm": 1.073209524154663, + "learning_rate": 2.5135387092740625e-06, + "loss": 0.138580322265625, + "step": 12125 + }, + { + "epoch": 0.8196566175476545, + "grad_norm": 0.6996605396270752, + "learning_rate": 2.5117106457470653e-06, + "loss": 0.12871551513671875, + "step": 12126 + }, + { + "epoch": 0.8197242125185886, + "grad_norm": 0.878935694694519, + "learning_rate": 2.5098831864811737e-06, + "loss": 0.13885498046875, + "step": 12127 + }, + { + "epoch": 0.8197918074895227, + "grad_norm": 0.9171041250228882, + "learning_rate": 2.50805633156481e-06, + "loss": 0.1876220703125, + "step": 12128 + }, + { + "epoch": 0.819859402460457, + "grad_norm": 1.1277716159820557, + "learning_rate": 2.506230081086365e-06, + "loss": 0.1350860595703125, + "step": 12129 + }, + { + "epoch": 0.8199269974313911, + "grad_norm": 0.7585632801055908, + "learning_rate": 2.5044044351342083e-06, + "loss": 0.09653091430664062, + "step": 12130 + }, + { + "epoch": 0.8199945924023253, + "grad_norm": 0.1950192153453827, + "learning_rate": 2.5025793937966744e-06, + "loss": 0.027545928955078125, + "step": 12131 + }, + { + "epoch": 0.8200621873732594, + "grad_norm": 0.559992790222168, + "learning_rate": 2.5007549571620703e-06, + "loss": 0.1186981201171875, + "step": 12132 + }, + { + "epoch": 0.8201297823441936, + "grad_norm": 0.23345281183719635, + "learning_rate": 2.498931125318673e-06, + "loss": 0.034934043884277344, + "step": 12133 + }, + { + "epoch": 0.8201973773151278, + "grad_norm": 1.4941344261169434, + "learning_rate": 2.497107898354731e-06, + "loss": 0.159698486328125, + "step": 12134 + }, + { + "epoch": 0.8202649722860619, + "grad_norm": 0.6525231003761292, + "learning_rate": 2.4952852763584627e-06, + "loss": 0.122528076171875, + "step": 12135 + }, + { + "epoch": 0.8203325672569961, + "grad_norm": 0.8004837036132812, + "learning_rate": 2.49346325941806e-06, + "loss": 0.12633132934570312, + "step": 12136 + }, + { + "epoch": 0.8204001622279302, + "grad_norm": 0.23995345830917358, + "learning_rate": 2.491641847621679e-06, + "loss": 0.034679412841796875, + "step": 12137 + }, + { + "epoch": 0.8204677571988644, + "grad_norm": 0.8829606175422668, + "learning_rate": 2.489821041057459e-06, + "loss": 0.168212890625, + "step": 12138 + }, + { + "epoch": 0.8205353521697986, + "grad_norm": 1.5709697008132935, + "learning_rate": 2.488000839813491e-06, + "loss": 0.1596221923828125, + "step": 12139 + }, + { + "epoch": 0.8206029471407327, + "grad_norm": 0.8411928415298462, + "learning_rate": 2.4861812439778587e-06, + "loss": 0.135467529296875, + "step": 12140 + }, + { + "epoch": 0.8206705421116669, + "grad_norm": 0.20613902807235718, + "learning_rate": 2.484362253638599e-06, + "loss": 0.043060302734375, + "step": 12141 + }, + { + "epoch": 0.820738137082601, + "grad_norm": 0.7021210789680481, + "learning_rate": 2.482543868883727e-06, + "loss": 0.10253143310546875, + "step": 12142 + }, + { + "epoch": 0.8208057320535352, + "grad_norm": 0.6037269830703735, + "learning_rate": 2.4807260898012325e-06, + "loss": 0.0998077392578125, + "step": 12143 + }, + { + "epoch": 0.8208733270244694, + "grad_norm": 0.42525994777679443, + "learning_rate": 2.478908916479058e-06, + "loss": 0.06798553466796875, + "step": 12144 + }, + { + "epoch": 0.8209409219954036, + "grad_norm": 0.49349167943000793, + "learning_rate": 2.4770923490051432e-06, + "loss": 0.0691986083984375, + "step": 12145 + }, + { + "epoch": 0.8210085169663377, + "grad_norm": 0.8701401948928833, + "learning_rate": 2.4752763874673782e-06, + "loss": 0.12105560302734375, + "step": 12146 + }, + { + "epoch": 0.8210761119372718, + "grad_norm": 0.5681114792823792, + "learning_rate": 2.4734610319536328e-06, + "loss": 0.08690643310546875, + "step": 12147 + }, + { + "epoch": 0.821143706908206, + "grad_norm": 0.7945037484169006, + "learning_rate": 2.4716462825517426e-06, + "loss": 0.126129150390625, + "step": 12148 + }, + { + "epoch": 0.8212113018791402, + "grad_norm": 0.7221956849098206, + "learning_rate": 2.46983213934952e-06, + "loss": 0.09497642517089844, + "step": 12149 + }, + { + "epoch": 0.8212788968500744, + "grad_norm": 0.39918196201324463, + "learning_rate": 2.468018602434739e-06, + "loss": 0.06818771362304688, + "step": 12150 + }, + { + "epoch": 0.8213464918210085, + "grad_norm": 1.0933107137680054, + "learning_rate": 2.4662056718951615e-06, + "loss": 0.180511474609375, + "step": 12151 + }, + { + "epoch": 0.8214140867919427, + "grad_norm": 0.41236698627471924, + "learning_rate": 2.464393347818492e-06, + "loss": 0.0697021484375, + "step": 12152 + }, + { + "epoch": 0.8214816817628768, + "grad_norm": 1.498746395111084, + "learning_rate": 2.4625816302924343e-06, + "loss": 0.11682891845703125, + "step": 12153 + }, + { + "epoch": 0.821549276733811, + "grad_norm": 1.1068367958068848, + "learning_rate": 2.460770519404651e-06, + "loss": 0.1321868896484375, + "step": 12154 + }, + { + "epoch": 0.8216168717047452, + "grad_norm": 0.3758634030818939, + "learning_rate": 2.458960015242762e-06, + "loss": 0.05393409729003906, + "step": 12155 + }, + { + "epoch": 0.8216844666756793, + "grad_norm": 0.33282336592674255, + "learning_rate": 2.4571501178943844e-06, + "loss": 0.0587921142578125, + "step": 12156 + }, + { + "epoch": 0.8217520616466135, + "grad_norm": 0.7849147915840149, + "learning_rate": 2.455340827447086e-06, + "loss": 0.1667022705078125, + "step": 12157 + }, + { + "epoch": 0.8218196566175476, + "grad_norm": 0.811514139175415, + "learning_rate": 2.453532143988414e-06, + "loss": 0.1674346923828125, + "step": 12158 + }, + { + "epoch": 0.8218872515884819, + "grad_norm": 1.096082091331482, + "learning_rate": 2.451724067605881e-06, + "loss": 0.142242431640625, + "step": 12159 + }, + { + "epoch": 0.821954846559416, + "grad_norm": 0.8885732293128967, + "learning_rate": 2.449916598386976e-06, + "loss": 0.10474395751953125, + "step": 12160 + }, + { + "epoch": 0.8220224415303501, + "grad_norm": 0.43064406514167786, + "learning_rate": 2.4481097364191535e-06, + "loss": 0.10319900512695312, + "step": 12161 + }, + { + "epoch": 0.8220900365012843, + "grad_norm": 0.6955626606941223, + "learning_rate": 2.4463034817898404e-06, + "loss": 0.12801361083984375, + "step": 12162 + }, + { + "epoch": 0.8221576314722184, + "grad_norm": 0.8774670958518982, + "learning_rate": 2.4444978345864323e-06, + "loss": 0.12006378173828125, + "step": 12163 + }, + { + "epoch": 0.8222252264431527, + "grad_norm": 0.37921032309532166, + "learning_rate": 2.4426927948963085e-06, + "loss": 0.0624847412109375, + "step": 12164 + }, + { + "epoch": 0.8222928214140868, + "grad_norm": 0.288742333650589, + "learning_rate": 2.440888362806793e-06, + "loss": 0.055225372314453125, + "step": 12165 + }, + { + "epoch": 0.822360416385021, + "grad_norm": 0.9346643686294556, + "learning_rate": 2.4390845384052075e-06, + "loss": 0.1907958984375, + "step": 12166 + }, + { + "epoch": 0.8224280113559551, + "grad_norm": 0.35418301820755005, + "learning_rate": 2.4372813217788305e-06, + "loss": 0.03321075439453125, + "step": 12167 + }, + { + "epoch": 0.8224956063268892, + "grad_norm": 0.30325692892074585, + "learning_rate": 2.435478713014902e-06, + "loss": 0.05449676513671875, + "step": 12168 + }, + { + "epoch": 0.8225632012978235, + "grad_norm": 0.50440514087677, + "learning_rate": 2.4336767122006544e-06, + "loss": 0.08425140380859375, + "step": 12169 + }, + { + "epoch": 0.8226307962687576, + "grad_norm": 0.18109439313411713, + "learning_rate": 2.4318753194232775e-06, + "loss": 0.028285980224609375, + "step": 12170 + }, + { + "epoch": 0.8226983912396918, + "grad_norm": 1.1444238424301147, + "learning_rate": 2.4300745347699335e-06, + "loss": 0.14017868041992188, + "step": 12171 + }, + { + "epoch": 0.8227659862106259, + "grad_norm": 0.6180599927902222, + "learning_rate": 2.428274358327754e-06, + "loss": 0.10092926025390625, + "step": 12172 + }, + { + "epoch": 0.8228335811815601, + "grad_norm": 0.41074827313423157, + "learning_rate": 2.426474790183843e-06, + "loss": 0.0671234130859375, + "step": 12173 + }, + { + "epoch": 0.8229011761524943, + "grad_norm": 0.6602770090103149, + "learning_rate": 2.424675830425274e-06, + "loss": 0.117279052734375, + "step": 12174 + }, + { + "epoch": 0.8229687711234284, + "grad_norm": 0.9907991290092468, + "learning_rate": 2.4228774791390996e-06, + "loss": 0.1664276123046875, + "step": 12175 + }, + { + "epoch": 0.8230363660943626, + "grad_norm": 0.34295040369033813, + "learning_rate": 2.421079736412321e-06, + "loss": 0.04772186279296875, + "step": 12176 + }, + { + "epoch": 0.8231039610652967, + "grad_norm": 0.2863124907016754, + "learning_rate": 2.419282602331941e-06, + "loss": 0.04360198974609375, + "step": 12177 + }, + { + "epoch": 0.8231715560362309, + "grad_norm": 0.5682650804519653, + "learning_rate": 2.417486076984903e-06, + "loss": 0.08509445190429688, + "step": 12178 + }, + { + "epoch": 0.823239151007165, + "grad_norm": 0.17853620648384094, + "learning_rate": 2.4156901604581338e-06, + "loss": 0.028001785278320312, + "step": 12179 + }, + { + "epoch": 0.8233067459780993, + "grad_norm": 0.37712377309799194, + "learning_rate": 2.4138948528385447e-06, + "loss": 0.0595245361328125, + "step": 12180 + }, + { + "epoch": 0.8233743409490334, + "grad_norm": 1.7043235301971436, + "learning_rate": 2.4121001542129874e-06, + "loss": 0.212371826171875, + "step": 12181 + }, + { + "epoch": 0.8234419359199675, + "grad_norm": 0.4101628363132477, + "learning_rate": 2.4103060646683107e-06, + "loss": 0.06633758544921875, + "step": 12182 + }, + { + "epoch": 0.8235095308909017, + "grad_norm": 1.2088021039962769, + "learning_rate": 2.4085125842913203e-06, + "loss": 0.11014556884765625, + "step": 12183 + }, + { + "epoch": 0.8235771258618358, + "grad_norm": 0.545789361000061, + "learning_rate": 2.4067197131687984e-06, + "loss": 0.1015777587890625, + "step": 12184 + }, + { + "epoch": 0.8236447208327701, + "grad_norm": 0.3600550591945648, + "learning_rate": 2.4049274513874924e-06, + "loss": 0.05438995361328125, + "step": 12185 + }, + { + "epoch": 0.8237123158037042, + "grad_norm": 0.9646144509315491, + "learning_rate": 2.4031357990341247e-06, + "loss": 0.15228271484375, + "step": 12186 + }, + { + "epoch": 0.8237799107746384, + "grad_norm": 0.25870591402053833, + "learning_rate": 2.401344756195382e-06, + "loss": 0.048095703125, + "step": 12187 + }, + { + "epoch": 0.8238475057455725, + "grad_norm": 1.117511510848999, + "learning_rate": 2.399554322957939e-06, + "loss": 0.1712799072265625, + "step": 12188 + }, + { + "epoch": 0.8239151007165066, + "grad_norm": 0.47053417563438416, + "learning_rate": 2.3977644994084145e-06, + "loss": 0.09323883056640625, + "step": 12189 + }, + { + "epoch": 0.8239826956874409, + "grad_norm": 0.5686004161834717, + "learning_rate": 2.395975285633411e-06, + "loss": 0.07755088806152344, + "step": 12190 + }, + { + "epoch": 0.824050290658375, + "grad_norm": 0.4493178427219391, + "learning_rate": 2.3941866817195157e-06, + "loss": 0.05926513671875, + "step": 12191 + }, + { + "epoch": 0.8241178856293092, + "grad_norm": 0.7069580554962158, + "learning_rate": 2.392398687753256e-06, + "loss": 0.10803985595703125, + "step": 12192 + }, + { + "epoch": 0.8241854806002433, + "grad_norm": 1.0083611011505127, + "learning_rate": 2.3906113038211612e-06, + "loss": 0.11530303955078125, + "step": 12193 + }, + { + "epoch": 0.8242530755711776, + "grad_norm": 0.6128612756729126, + "learning_rate": 2.3888245300097004e-06, + "loss": 0.11112213134765625, + "step": 12194 + }, + { + "epoch": 0.8243206705421117, + "grad_norm": 0.2889336049556732, + "learning_rate": 2.38703836640534e-06, + "loss": 0.04459381103515625, + "step": 12195 + }, + { + "epoch": 0.8243882655130458, + "grad_norm": 0.2996717095375061, + "learning_rate": 2.3852528130945044e-06, + "loss": 0.043140411376953125, + "step": 12196 + }, + { + "epoch": 0.82445586048398, + "grad_norm": 0.2793081998825073, + "learning_rate": 2.383467870163586e-06, + "loss": 0.033077239990234375, + "step": 12197 + }, + { + "epoch": 0.8245234554549141, + "grad_norm": 1.4949939250946045, + "learning_rate": 2.381683537698955e-06, + "loss": 0.23431396484375, + "step": 12198 + }, + { + "epoch": 0.8245910504258483, + "grad_norm": 0.6372634172439575, + "learning_rate": 2.3798998157869454e-06, + "loss": 0.131072998046875, + "step": 12199 + }, + { + "epoch": 0.8246586453967825, + "grad_norm": 1.1309504508972168, + "learning_rate": 2.378116704513864e-06, + "loss": 0.1645660400390625, + "step": 12200 + }, + { + "epoch": 0.8247262403677167, + "grad_norm": 0.6233770847320557, + "learning_rate": 2.376334203965998e-06, + "loss": 0.10935211181640625, + "step": 12201 + }, + { + "epoch": 0.8247938353386508, + "grad_norm": 0.3083098530769348, + "learning_rate": 2.374552314229585e-06, + "loss": 0.051448822021484375, + "step": 12202 + }, + { + "epoch": 0.8248614303095849, + "grad_norm": 0.4573988914489746, + "learning_rate": 2.3727710353908443e-06, + "loss": 0.0791168212890625, + "step": 12203 + }, + { + "epoch": 0.8249290252805191, + "grad_norm": 0.8519739508628845, + "learning_rate": 2.370990367535977e-06, + "loss": 0.1331796646118164, + "step": 12204 + }, + { + "epoch": 0.8249966202514533, + "grad_norm": 0.7979326844215393, + "learning_rate": 2.3692103107511264e-06, + "loss": 0.105987548828125, + "step": 12205 + }, + { + "epoch": 0.8250642152223875, + "grad_norm": 0.4379539489746094, + "learning_rate": 2.367430865122438e-06, + "loss": 0.0928802490234375, + "step": 12206 + }, + { + "epoch": 0.8251318101933216, + "grad_norm": 0.4394669234752655, + "learning_rate": 2.3656520307359995e-06, + "loss": 0.0753021240234375, + "step": 12207 + }, + { + "epoch": 0.8251994051642558, + "grad_norm": 0.6595166325569153, + "learning_rate": 2.363873807677891e-06, + "loss": 0.10465240478515625, + "step": 12208 + }, + { + "epoch": 0.8252670001351899, + "grad_norm": 0.9223187565803528, + "learning_rate": 2.36209619603415e-06, + "loss": 0.133514404296875, + "step": 12209 + }, + { + "epoch": 0.8253345951061241, + "grad_norm": 0.9003965854644775, + "learning_rate": 2.3603191958907906e-06, + "loss": 0.1602630615234375, + "step": 12210 + }, + { + "epoch": 0.8254021900770583, + "grad_norm": 0.31397396326065063, + "learning_rate": 2.3585428073337927e-06, + "loss": 0.05789947509765625, + "step": 12211 + }, + { + "epoch": 0.8254697850479924, + "grad_norm": 1.2315915822982788, + "learning_rate": 2.3567670304491097e-06, + "loss": 0.199462890625, + "step": 12212 + }, + { + "epoch": 0.8255373800189266, + "grad_norm": 0.732995331287384, + "learning_rate": 2.354991865322667e-06, + "loss": 0.1197662353515625, + "step": 12213 + }, + { + "epoch": 0.8256049749898607, + "grad_norm": 1.108600378036499, + "learning_rate": 2.353217312040355e-06, + "loss": 0.1943359375, + "step": 12214 + }, + { + "epoch": 0.825672569960795, + "grad_norm": 0.482795774936676, + "learning_rate": 2.3514433706880394e-06, + "loss": 0.075347900390625, + "step": 12215 + }, + { + "epoch": 0.8257401649317291, + "grad_norm": 0.8690413236618042, + "learning_rate": 2.34967004135155e-06, + "loss": 0.13315582275390625, + "step": 12216 + }, + { + "epoch": 0.8258077599026632, + "grad_norm": 0.9574484825134277, + "learning_rate": 2.347897324116704e-06, + "loss": 0.1566162109375, + "step": 12217 + }, + { + "epoch": 0.8258753548735974, + "grad_norm": 0.4389653503894806, + "learning_rate": 2.34612521906926e-06, + "loss": 0.0736541748046875, + "step": 12218 + }, + { + "epoch": 0.8259429498445315, + "grad_norm": 0.5916997790336609, + "learning_rate": 2.344353726294975e-06, + "loss": 0.099517822265625, + "step": 12219 + }, + { + "epoch": 0.8260105448154658, + "grad_norm": 0.5082765817642212, + "learning_rate": 2.3425828458795617e-06, + "loss": 0.07222747802734375, + "step": 12220 + }, + { + "epoch": 0.8260781397863999, + "grad_norm": 1.453971266746521, + "learning_rate": 2.3408125779087053e-06, + "loss": 0.12105560302734375, + "step": 12221 + }, + { + "epoch": 0.8261457347573341, + "grad_norm": 0.2694612741470337, + "learning_rate": 2.3390429224680637e-06, + "loss": 0.04190826416015625, + "step": 12222 + }, + { + "epoch": 0.8262133297282682, + "grad_norm": 0.6735344529151917, + "learning_rate": 2.3372738796432633e-06, + "loss": 0.12371826171875, + "step": 12223 + }, + { + "epoch": 0.8262809246992023, + "grad_norm": 0.4642373323440552, + "learning_rate": 2.3355054495198996e-06, + "loss": 0.0797576904296875, + "step": 12224 + }, + { + "epoch": 0.8263485196701366, + "grad_norm": 0.6302504539489746, + "learning_rate": 2.3337376321835435e-06, + "loss": 0.11553955078125, + "step": 12225 + }, + { + "epoch": 0.8264161146410707, + "grad_norm": 1.0958707332611084, + "learning_rate": 2.3319704277197318e-06, + "loss": 0.1298828125, + "step": 12226 + }, + { + "epoch": 0.8264837096120049, + "grad_norm": 0.7594574689865112, + "learning_rate": 2.330203836213971e-06, + "loss": 0.09769439697265625, + "step": 12227 + }, + { + "epoch": 0.826551304582939, + "grad_norm": 0.22945216298103333, + "learning_rate": 2.328437857751743e-06, + "loss": 0.043773651123046875, + "step": 12228 + }, + { + "epoch": 0.8266188995538732, + "grad_norm": 0.26774847507476807, + "learning_rate": 2.326672492418489e-06, + "loss": 0.03034210205078125, + "step": 12229 + }, + { + "epoch": 0.8266864945248074, + "grad_norm": 0.8991259932518005, + "learning_rate": 2.3249077402996443e-06, + "loss": 0.0946502685546875, + "step": 12230 + }, + { + "epoch": 0.8267540894957415, + "grad_norm": 0.38008856773376465, + "learning_rate": 2.323143601480579e-06, + "loss": 0.0662994384765625, + "step": 12231 + }, + { + "epoch": 0.8268216844666757, + "grad_norm": 0.19115440547466278, + "learning_rate": 2.321380076046668e-06, + "loss": 0.024806976318359375, + "step": 12232 + }, + { + "epoch": 0.8268892794376098, + "grad_norm": 0.3845231533050537, + "learning_rate": 2.3196171640832364e-06, + "loss": 0.06983184814453125, + "step": 12233 + }, + { + "epoch": 0.826956874408544, + "grad_norm": 0.5052611827850342, + "learning_rate": 2.3178548656755845e-06, + "loss": 0.07563400268554688, + "step": 12234 + }, + { + "epoch": 0.8270244693794782, + "grad_norm": 1.0369867086410522, + "learning_rate": 2.3160931809089836e-06, + "loss": 0.12707901000976562, + "step": 12235 + }, + { + "epoch": 0.8270920643504124, + "grad_norm": 1.3282673358917236, + "learning_rate": 2.314332109868674e-06, + "loss": 0.188232421875, + "step": 12236 + }, + { + "epoch": 0.8271596593213465, + "grad_norm": 0.8018513321876526, + "learning_rate": 2.31257165263987e-06, + "loss": 0.1361083984375, + "step": 12237 + }, + { + "epoch": 0.8272272542922806, + "grad_norm": 0.5657296776771545, + "learning_rate": 2.3108118093077507e-06, + "loss": 0.126922607421875, + "step": 12238 + }, + { + "epoch": 0.8272948492632148, + "grad_norm": 1.5306899547576904, + "learning_rate": 2.3090525799574683e-06, + "loss": 0.200775146484375, + "step": 12239 + }, + { + "epoch": 0.827362444234149, + "grad_norm": 0.5420700311660767, + "learning_rate": 2.3072939646741485e-06, + "loss": 0.09445953369140625, + "step": 12240 + }, + { + "epoch": 0.8274300392050832, + "grad_norm": 0.4630376398563385, + "learning_rate": 2.30553596354288e-06, + "loss": 0.062225341796875, + "step": 12241 + }, + { + "epoch": 0.8274976341760173, + "grad_norm": 0.46590158343315125, + "learning_rate": 2.3037785766487252e-06, + "loss": 0.07839202880859375, + "step": 12242 + }, + { + "epoch": 0.8275652291469515, + "grad_norm": 0.8162981271743774, + "learning_rate": 2.3020218040767256e-06, + "loss": 0.1414337158203125, + "step": 12243 + }, + { + "epoch": 0.8276328241178856, + "grad_norm": 0.4503256678581238, + "learning_rate": 2.3002656459118725e-06, + "loss": 0.06928253173828125, + "step": 12244 + }, + { + "epoch": 0.8277004190888197, + "grad_norm": 0.7952191829681396, + "learning_rate": 2.29851010223915e-06, + "loss": 0.139556884765625, + "step": 12245 + }, + { + "epoch": 0.827768014059754, + "grad_norm": 0.34999188780784607, + "learning_rate": 2.296755173143499e-06, + "loss": 0.051937103271484375, + "step": 12246 + }, + { + "epoch": 0.8278356090306881, + "grad_norm": 0.9593010544776917, + "learning_rate": 2.295000858709832e-06, + "loss": 0.0940399169921875, + "step": 12247 + }, + { + "epoch": 0.8279032040016223, + "grad_norm": 0.521260142326355, + "learning_rate": 2.2932471590230367e-06, + "loss": 0.0894317626953125, + "step": 12248 + }, + { + "epoch": 0.8279707989725564, + "grad_norm": 0.8879358172416687, + "learning_rate": 2.2914940741679657e-06, + "loss": 0.119049072265625, + "step": 12249 + }, + { + "epoch": 0.8280383939434907, + "grad_norm": 1.1896106004714966, + "learning_rate": 2.289741604229444e-06, + "loss": 0.1569366455078125, + "step": 12250 + }, + { + "epoch": 0.8281059889144248, + "grad_norm": 0.5320154428482056, + "learning_rate": 2.2879897492922686e-06, + "loss": 0.07635498046875, + "step": 12251 + }, + { + "epoch": 0.8281735838853589, + "grad_norm": 1.0837007761001587, + "learning_rate": 2.2862385094412034e-06, + "loss": 0.159027099609375, + "step": 12252 + }, + { + "epoch": 0.8282411788562931, + "grad_norm": 0.6872380971908569, + "learning_rate": 2.2844878847609862e-06, + "loss": 0.131072998046875, + "step": 12253 + }, + { + "epoch": 0.8283087738272272, + "grad_norm": 1.1925727128982544, + "learning_rate": 2.2827378753363204e-06, + "loss": 0.12296295166015625, + "step": 12254 + }, + { + "epoch": 0.8283763687981615, + "grad_norm": 0.5885637402534485, + "learning_rate": 2.2809884812518816e-06, + "loss": 0.109649658203125, + "step": 12255 + }, + { + "epoch": 0.8284439637690956, + "grad_norm": 0.7765867114067078, + "learning_rate": 2.2792397025923254e-06, + "loss": 0.147918701171875, + "step": 12256 + }, + { + "epoch": 0.8285115587400298, + "grad_norm": 0.5511977672576904, + "learning_rate": 2.2774915394422564e-06, + "loss": 0.0909576416015625, + "step": 12257 + }, + { + "epoch": 0.8285791537109639, + "grad_norm": 0.5480717420578003, + "learning_rate": 2.2757439918862693e-06, + "loss": 0.07622528076171875, + "step": 12258 + }, + { + "epoch": 0.828646748681898, + "grad_norm": 0.6890065670013428, + "learning_rate": 2.273997060008924e-06, + "loss": 0.17437744140625, + "step": 12259 + }, + { + "epoch": 0.8287143436528323, + "grad_norm": 1.68294095993042, + "learning_rate": 2.2722507438947336e-06, + "loss": 0.1881866455078125, + "step": 12260 + }, + { + "epoch": 0.8287819386237664, + "grad_norm": 0.5586157441139221, + "learning_rate": 2.27050504362821e-06, + "loss": 0.09038543701171875, + "step": 12261 + }, + { + "epoch": 0.8288495335947006, + "grad_norm": 0.35960397124290466, + "learning_rate": 2.268759959293816e-06, + "loss": 0.05137443542480469, + "step": 12262 + }, + { + "epoch": 0.8289171285656347, + "grad_norm": 0.27844586968421936, + "learning_rate": 2.2670154909759905e-06, + "loss": 0.042327880859375, + "step": 12263 + }, + { + "epoch": 0.8289847235365688, + "grad_norm": 0.3007356524467468, + "learning_rate": 2.2652716387591414e-06, + "loss": 0.04175567626953125, + "step": 12264 + }, + { + "epoch": 0.829052318507503, + "grad_norm": 0.3199879825115204, + "learning_rate": 2.263528402727647e-06, + "loss": 0.038928985595703125, + "step": 12265 + }, + { + "epoch": 0.8291199134784372, + "grad_norm": 0.6428213119506836, + "learning_rate": 2.2617857829658513e-06, + "loss": 0.09405517578125, + "step": 12266 + }, + { + "epoch": 0.8291875084493714, + "grad_norm": 2.4082539081573486, + "learning_rate": 2.2600437795580847e-06, + "loss": 0.240264892578125, + "step": 12267 + }, + { + "epoch": 0.8292551034203055, + "grad_norm": 0.3805938959121704, + "learning_rate": 2.258302392588622e-06, + "loss": 0.073822021484375, + "step": 12268 + }, + { + "epoch": 0.8293226983912397, + "grad_norm": 0.9907341003417969, + "learning_rate": 2.2565616221417374e-06, + "loss": 0.121063232421875, + "step": 12269 + }, + { + "epoch": 0.8293902933621738, + "grad_norm": 0.34932440519332886, + "learning_rate": 2.254821468301643e-06, + "loss": 0.07709503173828125, + "step": 12270 + }, + { + "epoch": 0.829457888333108, + "grad_norm": 1.2373361587524414, + "learning_rate": 2.2530819311525526e-06, + "loss": 0.14162063598632812, + "step": 12271 + }, + { + "epoch": 0.8295254833040422, + "grad_norm": 0.2710000276565552, + "learning_rate": 2.251343010778634e-06, + "loss": 0.03485870361328125, + "step": 12272 + }, + { + "epoch": 0.8295930782749763, + "grad_norm": 0.5733964443206787, + "learning_rate": 2.249604707264016e-06, + "loss": 0.09029388427734375, + "step": 12273 + }, + { + "epoch": 0.8296606732459105, + "grad_norm": 0.8581568002700806, + "learning_rate": 2.24786702069282e-06, + "loss": 0.176605224609375, + "step": 12274 + }, + { + "epoch": 0.8297282682168446, + "grad_norm": 0.5739345550537109, + "learning_rate": 2.246129951149121e-06, + "loss": 0.09088134765625, + "step": 12275 + }, + { + "epoch": 0.8297958631877789, + "grad_norm": 0.8410003185272217, + "learning_rate": 2.244393498716972e-06, + "loss": 0.204833984375, + "step": 12276 + }, + { + "epoch": 0.829863458158713, + "grad_norm": 0.34039998054504395, + "learning_rate": 2.242657663480391e-06, + "loss": 0.0367584228515625, + "step": 12277 + }, + { + "epoch": 0.8299310531296471, + "grad_norm": 0.7953721284866333, + "learning_rate": 2.240922445523369e-06, + "loss": 0.153900146484375, + "step": 12278 + }, + { + "epoch": 0.8299986481005813, + "grad_norm": 1.4059659242630005, + "learning_rate": 2.2391878449298643e-06, + "loss": 0.2192840576171875, + "step": 12279 + }, + { + "epoch": 0.8300662430715154, + "grad_norm": 1.2027666568756104, + "learning_rate": 2.2374538617838176e-06, + "loss": 0.17934417724609375, + "step": 12280 + }, + { + "epoch": 0.8301338380424497, + "grad_norm": 0.3657384216785431, + "learning_rate": 2.2357204961691134e-06, + "loss": 0.05806732177734375, + "step": 12281 + }, + { + "epoch": 0.8302014330133838, + "grad_norm": 0.7091779708862305, + "learning_rate": 2.23398774816964e-06, + "loss": 0.1373291015625, + "step": 12282 + }, + { + "epoch": 0.830269027984318, + "grad_norm": 0.24669213593006134, + "learning_rate": 2.2322556178692274e-06, + "loss": 0.037036895751953125, + "step": 12283 + }, + { + "epoch": 0.8303366229552521, + "grad_norm": 0.42348217964172363, + "learning_rate": 2.2305241053516857e-06, + "loss": 0.0587615966796875, + "step": 12284 + }, + { + "epoch": 0.8304042179261862, + "grad_norm": 0.5869652032852173, + "learning_rate": 2.2287932107008087e-06, + "loss": 0.10710906982421875, + "step": 12285 + }, + { + "epoch": 0.8304718128971205, + "grad_norm": 0.6630932092666626, + "learning_rate": 2.2270629340003306e-06, + "loss": 0.12253570556640625, + "step": 12286 + }, + { + "epoch": 0.8305394078680546, + "grad_norm": 0.48475393652915955, + "learning_rate": 2.2253332753339866e-06, + "loss": 0.08730888366699219, + "step": 12287 + }, + { + "epoch": 0.8306070028389888, + "grad_norm": 0.738655149936676, + "learning_rate": 2.223604234785463e-06, + "loss": 0.11272430419921875, + "step": 12288 + }, + { + "epoch": 0.8306745978099229, + "grad_norm": 0.8838648796081543, + "learning_rate": 2.221875812438421e-06, + "loss": 0.15419769287109375, + "step": 12289 + }, + { + "epoch": 0.8307421927808571, + "grad_norm": 0.27410927414894104, + "learning_rate": 2.2201480083764954e-06, + "loss": 0.046230316162109375, + "step": 12290 + }, + { + "epoch": 0.8308097877517913, + "grad_norm": 0.4804632365703583, + "learning_rate": 2.2184208226832843e-06, + "loss": 0.0948028564453125, + "step": 12291 + }, + { + "epoch": 0.8308773827227254, + "grad_norm": 0.39119115471839905, + "learning_rate": 2.216694255442359e-06, + "loss": 0.08386993408203125, + "step": 12292 + }, + { + "epoch": 0.8309449776936596, + "grad_norm": 0.658527672290802, + "learning_rate": 2.214968306737272e-06, + "loss": 0.1018829345703125, + "step": 12293 + }, + { + "epoch": 0.8310125726645937, + "grad_norm": 0.914338231086731, + "learning_rate": 2.2132429766515232e-06, + "loss": 0.11143112182617188, + "step": 12294 + }, + { + "epoch": 0.8310801676355279, + "grad_norm": 1.9442415237426758, + "learning_rate": 2.2115182652685954e-06, + "loss": 0.17955780029296875, + "step": 12295 + }, + { + "epoch": 0.8311477626064621, + "grad_norm": 0.33879125118255615, + "learning_rate": 2.2097941726719517e-06, + "loss": 0.0473480224609375, + "step": 12296 + }, + { + "epoch": 0.8312153575773963, + "grad_norm": 0.4051454961299896, + "learning_rate": 2.208070698945e-06, + "loss": 0.06771087646484375, + "step": 12297 + }, + { + "epoch": 0.8312829525483304, + "grad_norm": 0.3904553949832916, + "learning_rate": 2.206347844171149e-06, + "loss": 0.0661163330078125, + "step": 12298 + }, + { + "epoch": 0.8313505475192645, + "grad_norm": 0.2666597068309784, + "learning_rate": 2.204625608433743e-06, + "loss": 0.035327911376953125, + "step": 12299 + }, + { + "epoch": 0.8314181424901987, + "grad_norm": 0.28230008482933044, + "learning_rate": 2.2029039918161276e-06, + "loss": 0.05086517333984375, + "step": 12300 + }, + { + "epoch": 0.8314857374611329, + "grad_norm": 1.1125181913375854, + "learning_rate": 2.2011829944016026e-06, + "loss": 0.166778564453125, + "step": 12301 + }, + { + "epoch": 0.8315533324320671, + "grad_norm": 0.9980229139328003, + "learning_rate": 2.199462616273438e-06, + "loss": 0.17657470703125, + "step": 12302 + }, + { + "epoch": 0.8316209274030012, + "grad_norm": 0.6643760204315186, + "learning_rate": 2.197742857514878e-06, + "loss": 0.097015380859375, + "step": 12303 + }, + { + "epoch": 0.8316885223739354, + "grad_norm": 1.1000394821166992, + "learning_rate": 2.1960237182091346e-06, + "loss": 0.16684341430664062, + "step": 12304 + }, + { + "epoch": 0.8317561173448695, + "grad_norm": 0.6643982529640198, + "learning_rate": 2.1943051984393894e-06, + "loss": 0.10958099365234375, + "step": 12305 + }, + { + "epoch": 0.8318237123158037, + "grad_norm": 0.6636265516281128, + "learning_rate": 2.1925872982888012e-06, + "loss": 0.162872314453125, + "step": 12306 + }, + { + "epoch": 0.8318913072867379, + "grad_norm": 0.3595871031284332, + "learning_rate": 2.1908700178404846e-06, + "loss": 0.07421875, + "step": 12307 + }, + { + "epoch": 0.831958902257672, + "grad_norm": 1.0146533250808716, + "learning_rate": 2.189153357177534e-06, + "loss": 0.1727294921875, + "step": 12308 + }, + { + "epoch": 0.8320264972286062, + "grad_norm": 1.0881143808364868, + "learning_rate": 2.1874373163830184e-06, + "loss": 0.1400909423828125, + "step": 12309 + }, + { + "epoch": 0.8320940921995403, + "grad_norm": 0.7120606303215027, + "learning_rate": 2.18572189553996e-06, + "loss": 0.1064300537109375, + "step": 12310 + }, + { + "epoch": 0.8321616871704746, + "grad_norm": 0.2907428443431854, + "learning_rate": 2.1840070947313717e-06, + "loss": 0.038646697998046875, + "step": 12311 + }, + { + "epoch": 0.8322292821414087, + "grad_norm": 0.34008100628852844, + "learning_rate": 2.182292914040221e-06, + "loss": 0.049747467041015625, + "step": 12312 + }, + { + "epoch": 0.8322968771123428, + "grad_norm": 0.8962854743003845, + "learning_rate": 2.180579353549451e-06, + "loss": 0.11660385131835938, + "step": 12313 + }, + { + "epoch": 0.832364472083277, + "grad_norm": 0.7853034734725952, + "learning_rate": 2.178866413341976e-06, + "loss": 0.126800537109375, + "step": 12314 + }, + { + "epoch": 0.8324320670542111, + "grad_norm": 0.6704707741737366, + "learning_rate": 2.1771540935006777e-06, + "loss": 0.09830093383789062, + "step": 12315 + }, + { + "epoch": 0.8324996620251454, + "grad_norm": 0.9418291449546814, + "learning_rate": 2.1754423941084086e-06, + "loss": 0.1104888916015625, + "step": 12316 + }, + { + "epoch": 0.8325672569960795, + "grad_norm": 0.422469824552536, + "learning_rate": 2.1737313152479916e-06, + "loss": 0.07468414306640625, + "step": 12317 + }, + { + "epoch": 0.8326348519670137, + "grad_norm": 0.4200880825519562, + "learning_rate": 2.172020857002219e-06, + "loss": 0.05225181579589844, + "step": 12318 + }, + { + "epoch": 0.8327024469379478, + "grad_norm": 0.44791164994239807, + "learning_rate": 2.170311019453854e-06, + "loss": 0.0952911376953125, + "step": 12319 + }, + { + "epoch": 0.8327700419088819, + "grad_norm": 0.31166133284568787, + "learning_rate": 2.1686018026856273e-06, + "loss": 0.030162811279296875, + "step": 12320 + }, + { + "epoch": 0.8328376368798162, + "grad_norm": 0.8336947560310364, + "learning_rate": 2.1668932067802424e-06, + "loss": 0.08490753173828125, + "step": 12321 + }, + { + "epoch": 0.8329052318507503, + "grad_norm": 0.29267072677612305, + "learning_rate": 2.1651852318203768e-06, + "loss": 0.04895782470703125, + "step": 12322 + }, + { + "epoch": 0.8329728268216845, + "grad_norm": 0.2952665090560913, + "learning_rate": 2.163477877888664e-06, + "loss": 0.06288909912109375, + "step": 12323 + }, + { + "epoch": 0.8330404217926186, + "grad_norm": 0.6097967028617859, + "learning_rate": 2.161771145067722e-06, + "loss": 0.1158905029296875, + "step": 12324 + }, + { + "epoch": 0.8331080167635528, + "grad_norm": 0.3805530369281769, + "learning_rate": 2.1600650334401335e-06, + "loss": 0.055118560791015625, + "step": 12325 + }, + { + "epoch": 0.833175611734487, + "grad_norm": 0.409146249294281, + "learning_rate": 2.158359543088449e-06, + "loss": 0.04064369201660156, + "step": 12326 + }, + { + "epoch": 0.8332432067054211, + "grad_norm": 0.38595980405807495, + "learning_rate": 2.156654674095191e-06, + "loss": 0.07859039306640625, + "step": 12327 + }, + { + "epoch": 0.8333108016763553, + "grad_norm": 0.3592469394207001, + "learning_rate": 2.1549504265428516e-06, + "loss": 0.0878448486328125, + "step": 12328 + }, + { + "epoch": 0.8333783966472894, + "grad_norm": 0.232466459274292, + "learning_rate": 2.1532468005138935e-06, + "loss": 0.0450897216796875, + "step": 12329 + }, + { + "epoch": 0.8334459916182236, + "grad_norm": 0.793074369430542, + "learning_rate": 2.1515437960907487e-06, + "loss": 0.1243438720703125, + "step": 12330 + }, + { + "epoch": 0.8335135865891578, + "grad_norm": 0.9045864343643188, + "learning_rate": 2.149841413355818e-06, + "loss": 0.10774993896484375, + "step": 12331 + }, + { + "epoch": 0.833581181560092, + "grad_norm": 0.5784235000610352, + "learning_rate": 2.148139652391474e-06, + "loss": 0.12298583984375, + "step": 12332 + }, + { + "epoch": 0.8336487765310261, + "grad_norm": 0.7687890529632568, + "learning_rate": 2.146438513280058e-06, + "loss": 0.12094497680664062, + "step": 12333 + }, + { + "epoch": 0.8337163715019602, + "grad_norm": 0.1782209426164627, + "learning_rate": 2.14473799610388e-06, + "loss": 0.015542030334472656, + "step": 12334 + }, + { + "epoch": 0.8337839664728944, + "grad_norm": 1.4311021566390991, + "learning_rate": 2.143038100945231e-06, + "loss": 0.1876220703125, + "step": 12335 + }, + { + "epoch": 0.8338515614438285, + "grad_norm": 0.9882516860961914, + "learning_rate": 2.141338827886347e-06, + "loss": 0.184112548828125, + "step": 12336 + }, + { + "epoch": 0.8339191564147628, + "grad_norm": 0.1664721816778183, + "learning_rate": 2.139640177009461e-06, + "loss": 0.016368865966796875, + "step": 12337 + }, + { + "epoch": 0.8339867513856969, + "grad_norm": 0.2566864788532257, + "learning_rate": 2.1379421483967616e-06, + "loss": 0.036525726318359375, + "step": 12338 + }, + { + "epoch": 0.8340543463566311, + "grad_norm": 0.2032930999994278, + "learning_rate": 2.13624474213041e-06, + "loss": 0.02140045166015625, + "step": 12339 + }, + { + "epoch": 0.8341219413275652, + "grad_norm": 0.6037617921829224, + "learning_rate": 2.1345479582925353e-06, + "loss": 0.0872344970703125, + "step": 12340 + }, + { + "epoch": 0.8341895362984993, + "grad_norm": 0.5806068181991577, + "learning_rate": 2.1328517969652406e-06, + "loss": 0.08966064453125, + "step": 12341 + }, + { + "epoch": 0.8342571312694336, + "grad_norm": 0.9919243454933167, + "learning_rate": 2.131156258230595e-06, + "loss": 0.14947509765625, + "step": 12342 + }, + { + "epoch": 0.8343247262403677, + "grad_norm": 1.0676121711730957, + "learning_rate": 2.129461342170641e-06, + "loss": 0.2105712890625, + "step": 12343 + }, + { + "epoch": 0.8343923212113019, + "grad_norm": 0.3390708863735199, + "learning_rate": 2.1277670488673885e-06, + "loss": 0.06758880615234375, + "step": 12344 + }, + { + "epoch": 0.834459916182236, + "grad_norm": 0.9517495036125183, + "learning_rate": 2.1260733784028163e-06, + "loss": 0.183074951171875, + "step": 12345 + }, + { + "epoch": 0.8345275111531703, + "grad_norm": 0.4947414994239807, + "learning_rate": 2.124380330858878e-06, + "loss": 0.08148574829101562, + "step": 12346 + }, + { + "epoch": 0.8345951061241044, + "grad_norm": 0.26516109704971313, + "learning_rate": 2.122687906317487e-06, + "loss": 0.038338661193847656, + "step": 12347 + }, + { + "epoch": 0.8346627010950385, + "grad_norm": 0.5404166579246521, + "learning_rate": 2.120996104860545e-06, + "loss": 0.10482025146484375, + "step": 12348 + }, + { + "epoch": 0.8347302960659727, + "grad_norm": 0.7355604767799377, + "learning_rate": 2.1193049265698986e-06, + "loss": 0.130401611328125, + "step": 12349 + }, + { + "epoch": 0.8347978910369068, + "grad_norm": 0.28991734981536865, + "learning_rate": 2.117614371527389e-06, + "loss": 0.054290771484375, + "step": 12350 + }, + { + "epoch": 0.834865486007841, + "grad_norm": 0.9513311386108398, + "learning_rate": 2.11592443981481e-06, + "loss": 0.156402587890625, + "step": 12351 + }, + { + "epoch": 0.8349330809787752, + "grad_norm": 0.6962950229644775, + "learning_rate": 2.1142351315139314e-06, + "loss": 0.11573028564453125, + "step": 12352 + }, + { + "epoch": 0.8350006759497094, + "grad_norm": 0.6424234509468079, + "learning_rate": 2.1125464467064914e-06, + "loss": 0.10445404052734375, + "step": 12353 + }, + { + "epoch": 0.8350682709206435, + "grad_norm": 0.4304334223270416, + "learning_rate": 2.110858385474203e-06, + "loss": 0.079254150390625, + "step": 12354 + }, + { + "epoch": 0.8351358658915776, + "grad_norm": 1.2363290786743164, + "learning_rate": 2.1091709478987407e-06, + "loss": 0.166900634765625, + "step": 12355 + }, + { + "epoch": 0.8352034608625118, + "grad_norm": 0.5367790460586548, + "learning_rate": 2.1074841340617563e-06, + "loss": 0.0906982421875, + "step": 12356 + }, + { + "epoch": 0.835271055833446, + "grad_norm": 0.4313432276248932, + "learning_rate": 2.1057979440448673e-06, + "loss": 0.053501129150390625, + "step": 12357 + }, + { + "epoch": 0.8353386508043802, + "grad_norm": 2.11079478263855, + "learning_rate": 2.1041123779296583e-06, + "loss": 0.2134552001953125, + "step": 12358 + }, + { + "epoch": 0.8354062457753143, + "grad_norm": 0.5269497036933899, + "learning_rate": 2.102427435797698e-06, + "loss": 0.101654052734375, + "step": 12359 + }, + { + "epoch": 0.8354738407462485, + "grad_norm": 0.19493785500526428, + "learning_rate": 2.1007431177305008e-06, + "loss": 0.022144317626953125, + "step": 12360 + }, + { + "epoch": 0.8355414357171826, + "grad_norm": 0.47268247604370117, + "learning_rate": 2.0990594238095765e-06, + "loss": 0.09482192993164062, + "step": 12361 + }, + { + "epoch": 0.8356090306881168, + "grad_norm": 0.40084683895111084, + "learning_rate": 2.097376354116382e-06, + "loss": 0.03994178771972656, + "step": 12362 + }, + { + "epoch": 0.835676625659051, + "grad_norm": 0.34304407238960266, + "learning_rate": 2.0956939087323634e-06, + "loss": 0.0448455810546875, + "step": 12363 + }, + { + "epoch": 0.8357442206299851, + "grad_norm": 0.28780195116996765, + "learning_rate": 2.094012087738924e-06, + "loss": 0.0535888671875, + "step": 12364 + }, + { + "epoch": 0.8358118156009193, + "grad_norm": 1.3412834405899048, + "learning_rate": 2.092330891217442e-06, + "loss": 0.13885116577148438, + "step": 12365 + }, + { + "epoch": 0.8358794105718534, + "grad_norm": 0.3800259530544281, + "learning_rate": 2.0906503192492628e-06, + "loss": 0.076507568359375, + "step": 12366 + }, + { + "epoch": 0.8359470055427877, + "grad_norm": 0.7366593480110168, + "learning_rate": 2.088970371915704e-06, + "loss": 0.135894775390625, + "step": 12367 + }, + { + "epoch": 0.8360146005137218, + "grad_norm": 0.4605947732925415, + "learning_rate": 2.0872910492980505e-06, + "loss": 0.0826416015625, + "step": 12368 + }, + { + "epoch": 0.8360821954846559, + "grad_norm": 0.40328872203826904, + "learning_rate": 2.0856123514775597e-06, + "loss": 0.05419921875, + "step": 12369 + }, + { + "epoch": 0.8361497904555901, + "grad_norm": 0.9848887324333191, + "learning_rate": 2.0839342785354583e-06, + "loss": 0.175323486328125, + "step": 12370 + }, + { + "epoch": 0.8362173854265242, + "grad_norm": 0.7092363834381104, + "learning_rate": 2.0822568305529353e-06, + "loss": 0.156707763671875, + "step": 12371 + }, + { + "epoch": 0.8362849803974585, + "grad_norm": 0.5092290639877319, + "learning_rate": 2.0805800076111703e-06, + "loss": 0.0687713623046875, + "step": 12372 + }, + { + "epoch": 0.8363525753683926, + "grad_norm": 0.3640978932380676, + "learning_rate": 2.078903809791281e-06, + "loss": 0.05279541015625, + "step": 12373 + }, + { + "epoch": 0.8364201703393268, + "grad_norm": 0.6662883162498474, + "learning_rate": 2.0772282371743876e-06, + "loss": 0.145904541015625, + "step": 12374 + }, + { + "epoch": 0.8364877653102609, + "grad_norm": 0.6092257499694824, + "learning_rate": 2.0755532898415526e-06, + "loss": 0.10324859619140625, + "step": 12375 + }, + { + "epoch": 0.836555360281195, + "grad_norm": 0.7887485027313232, + "learning_rate": 2.0738789678738286e-06, + "loss": 0.12560272216796875, + "step": 12376 + }, + { + "epoch": 0.8366229552521293, + "grad_norm": 1.0227596759796143, + "learning_rate": 2.0722052713522293e-06, + "loss": 0.15421485900878906, + "step": 12377 + }, + { + "epoch": 0.8366905502230634, + "grad_norm": 0.5304071307182312, + "learning_rate": 2.0705322003577302e-06, + "loss": 0.07808494567871094, + "step": 12378 + }, + { + "epoch": 0.8367581451939976, + "grad_norm": 0.47753632068634033, + "learning_rate": 2.0688597549712932e-06, + "loss": 0.06526947021484375, + "step": 12379 + }, + { + "epoch": 0.8368257401649317, + "grad_norm": 0.23872016370296478, + "learning_rate": 2.0671879352738405e-06, + "loss": 0.035747528076171875, + "step": 12380 + }, + { + "epoch": 0.836893335135866, + "grad_norm": 1.57858145236969, + "learning_rate": 2.0655167413462633e-06, + "loss": 0.18691253662109375, + "step": 12381 + }, + { + "epoch": 0.8369609301068001, + "grad_norm": 0.8440356850624084, + "learning_rate": 2.063846173269424e-06, + "loss": 0.12722015380859375, + "step": 12382 + }, + { + "epoch": 0.8370285250777342, + "grad_norm": 0.4483441114425659, + "learning_rate": 2.0621762311241566e-06, + "loss": 0.054454803466796875, + "step": 12383 + }, + { + "epoch": 0.8370961200486684, + "grad_norm": 0.4229510724544525, + "learning_rate": 2.0605069149912593e-06, + "loss": 0.099884033203125, + "step": 12384 + }, + { + "epoch": 0.8371637150196025, + "grad_norm": 1.8089755773544312, + "learning_rate": 2.0588382249515143e-06, + "loss": 0.20742034912109375, + "step": 12385 + }, + { + "epoch": 0.8372313099905367, + "grad_norm": 0.18791945278644562, + "learning_rate": 2.0571701610856486e-06, + "loss": 0.02402496337890625, + "step": 12386 + }, + { + "epoch": 0.8372989049614709, + "grad_norm": 0.5997940301895142, + "learning_rate": 2.0555027234743845e-06, + "loss": 0.142425537109375, + "step": 12387 + }, + { + "epoch": 0.8373664999324051, + "grad_norm": 0.5275073051452637, + "learning_rate": 2.053835912198404e-06, + "loss": 0.10292816162109375, + "step": 12388 + }, + { + "epoch": 0.8374340949033392, + "grad_norm": 1.300729513168335, + "learning_rate": 2.052169727338346e-06, + "loss": 0.203521728515625, + "step": 12389 + }, + { + "epoch": 0.8375016898742733, + "grad_norm": 0.6421422958374023, + "learning_rate": 2.050504168974846e-06, + "loss": 0.1042633056640625, + "step": 12390 + }, + { + "epoch": 0.8375692848452075, + "grad_norm": 1.660780906677246, + "learning_rate": 2.048839237188478e-06, + "loss": 0.201568603515625, + "step": 12391 + }, + { + "epoch": 0.8376368798161417, + "grad_norm": 0.570635199546814, + "learning_rate": 2.047174932059814e-06, + "loss": 0.0978240966796875, + "step": 12392 + }, + { + "epoch": 0.8377044747870759, + "grad_norm": 0.5503248572349548, + "learning_rate": 2.0455112536693794e-06, + "loss": 0.0883331298828125, + "step": 12393 + }, + { + "epoch": 0.83777206975801, + "grad_norm": 0.38728126883506775, + "learning_rate": 2.043848202097675e-06, + "loss": 0.05673980712890625, + "step": 12394 + }, + { + "epoch": 0.8378396647289441, + "grad_norm": 1.3398998975753784, + "learning_rate": 2.0421857774251666e-06, + "loss": 0.193450927734375, + "step": 12395 + }, + { + "epoch": 0.8379072596998783, + "grad_norm": 0.40028029680252075, + "learning_rate": 2.0405239797322935e-06, + "loss": 0.06679916381835938, + "step": 12396 + }, + { + "epoch": 0.8379748546708125, + "grad_norm": 0.2480478435754776, + "learning_rate": 2.038862809099462e-06, + "loss": 0.03809356689453125, + "step": 12397 + }, + { + "epoch": 0.8380424496417467, + "grad_norm": 0.7269764542579651, + "learning_rate": 2.037202265607059e-06, + "loss": 0.1354522705078125, + "step": 12398 + }, + { + "epoch": 0.8381100446126808, + "grad_norm": 0.575331449508667, + "learning_rate": 2.0355423493354174e-06, + "loss": 0.1531982421875, + "step": 12399 + }, + { + "epoch": 0.838177639583615, + "grad_norm": 0.33198854327201843, + "learning_rate": 2.033883060364867e-06, + "loss": 0.05843353271484375, + "step": 12400 + }, + { + "epoch": 0.8382452345545491, + "grad_norm": 1.2610383033752441, + "learning_rate": 2.032224398775692e-06, + "loss": 0.1952667236328125, + "step": 12401 + }, + { + "epoch": 0.8383128295254832, + "grad_norm": 0.314778596162796, + "learning_rate": 2.03056636464814e-06, + "loss": 0.046672821044921875, + "step": 12402 + }, + { + "epoch": 0.8383804244964175, + "grad_norm": 0.7542091012001038, + "learning_rate": 2.028908958062446e-06, + "loss": 0.131072998046875, + "step": 12403 + }, + { + "epoch": 0.8384480194673516, + "grad_norm": 0.37073349952697754, + "learning_rate": 2.027252179098803e-06, + "loss": 0.06565093994140625, + "step": 12404 + }, + { + "epoch": 0.8385156144382858, + "grad_norm": 0.9139230847358704, + "learning_rate": 2.025596027837377e-06, + "loss": 0.12606048583984375, + "step": 12405 + }, + { + "epoch": 0.8385832094092199, + "grad_norm": 0.479897141456604, + "learning_rate": 2.023940504358302e-06, + "loss": 0.079681396484375, + "step": 12406 + }, + { + "epoch": 0.8386508043801542, + "grad_norm": 0.8352175951004028, + "learning_rate": 2.0222856087416824e-06, + "loss": 0.102264404296875, + "step": 12407 + }, + { + "epoch": 0.8387183993510883, + "grad_norm": 0.6601513028144836, + "learning_rate": 2.0206313410675926e-06, + "loss": 0.11052703857421875, + "step": 12408 + }, + { + "epoch": 0.8387859943220224, + "grad_norm": 0.6968417167663574, + "learning_rate": 2.018977701416075e-06, + "loss": 0.11933517456054688, + "step": 12409 + }, + { + "epoch": 0.8388535892929566, + "grad_norm": 0.6600892543792725, + "learning_rate": 2.017324689867142e-06, + "loss": 0.1327667236328125, + "step": 12410 + }, + { + "epoch": 0.8389211842638907, + "grad_norm": 0.2911466956138611, + "learning_rate": 2.015672306500787e-06, + "loss": 0.06005096435546875, + "step": 12411 + }, + { + "epoch": 0.838988779234825, + "grad_norm": 1.057274341583252, + "learning_rate": 2.014020551396949e-06, + "loss": 0.1216278076171875, + "step": 12412 + }, + { + "epoch": 0.8390563742057591, + "grad_norm": 1.1332215070724487, + "learning_rate": 2.012369424635554e-06, + "loss": 0.216644287109375, + "step": 12413 + }, + { + "epoch": 0.8391239691766933, + "grad_norm": 0.3849017322063446, + "learning_rate": 2.0107189262965013e-06, + "loss": 0.065399169921875, + "step": 12414 + }, + { + "epoch": 0.8391915641476274, + "grad_norm": 0.5388394594192505, + "learning_rate": 2.0090690564596394e-06, + "loss": 0.10888671875, + "step": 12415 + }, + { + "epoch": 0.8392591591185615, + "grad_norm": 1.001484751701355, + "learning_rate": 2.00741981520481e-06, + "loss": 0.1288299560546875, + "step": 12416 + }, + { + "epoch": 0.8393267540894958, + "grad_norm": 0.9241666793823242, + "learning_rate": 2.00577120261181e-06, + "loss": 0.222076416015625, + "step": 12417 + }, + { + "epoch": 0.8393943490604299, + "grad_norm": 0.6739726066589355, + "learning_rate": 2.004123218760411e-06, + "loss": 0.1297454833984375, + "step": 12418 + }, + { + "epoch": 0.8394619440313641, + "grad_norm": 0.6818175911903381, + "learning_rate": 2.002475863730348e-06, + "loss": 0.1173095703125, + "step": 12419 + }, + { + "epoch": 0.8395295390022982, + "grad_norm": 0.4254690706729889, + "learning_rate": 2.0008291376013367e-06, + "loss": 0.08045196533203125, + "step": 12420 + }, + { + "epoch": 0.8395971339732324, + "grad_norm": 1.3765976428985596, + "learning_rate": 1.999183040453051e-06, + "loss": 0.2469482421875, + "step": 12421 + }, + { + "epoch": 0.8396647289441665, + "grad_norm": 0.6376322507858276, + "learning_rate": 1.9975375723651405e-06, + "loss": 0.121429443359375, + "step": 12422 + }, + { + "epoch": 0.8397323239151007, + "grad_norm": 0.7335541844367981, + "learning_rate": 1.995892733417224e-06, + "loss": 0.124053955078125, + "step": 12423 + }, + { + "epoch": 0.8397999188860349, + "grad_norm": 0.2801879048347473, + "learning_rate": 1.9942485236888883e-06, + "loss": 0.05183982849121094, + "step": 12424 + }, + { + "epoch": 0.839867513856969, + "grad_norm": 1.2136763334274292, + "learning_rate": 1.9926049432596927e-06, + "loss": 0.14254379272460938, + "step": 12425 + }, + { + "epoch": 0.8399351088279032, + "grad_norm": 1.1598734855651855, + "learning_rate": 1.9909619922091577e-06, + "loss": 0.1515960693359375, + "step": 12426 + }, + { + "epoch": 0.8400027037988373, + "grad_norm": 1.1077930927276611, + "learning_rate": 1.98931967061679e-06, + "loss": 0.1581573486328125, + "step": 12427 + }, + { + "epoch": 0.8400702987697716, + "grad_norm": 0.28077232837677, + "learning_rate": 1.9876779785620424e-06, + "loss": 0.03610992431640625, + "step": 12428 + }, + { + "epoch": 0.8401378937407057, + "grad_norm": 0.6696039438247681, + "learning_rate": 1.986036916124362e-06, + "loss": 0.12804412841796875, + "step": 12429 + }, + { + "epoch": 0.8402054887116398, + "grad_norm": 1.457324743270874, + "learning_rate": 1.984396483383148e-06, + "loss": 0.204559326171875, + "step": 12430 + }, + { + "epoch": 0.840273083682574, + "grad_norm": 0.31101667881011963, + "learning_rate": 1.982756680417774e-06, + "loss": 0.05725860595703125, + "step": 12431 + }, + { + "epoch": 0.8403406786535081, + "grad_norm": 0.5283198356628418, + "learning_rate": 1.981117507307586e-06, + "loss": 0.1221466064453125, + "step": 12432 + }, + { + "epoch": 0.8404082736244424, + "grad_norm": 1.3407107591629028, + "learning_rate": 1.979478964131896e-06, + "loss": 0.185760498046875, + "step": 12433 + }, + { + "epoch": 0.8404758685953765, + "grad_norm": 0.40747275948524475, + "learning_rate": 1.9778410509699857e-06, + "loss": 0.0821685791015625, + "step": 12434 + }, + { + "epoch": 0.8405434635663107, + "grad_norm": 0.3410094976425171, + "learning_rate": 1.9762037679011148e-06, + "loss": 0.06948471069335938, + "step": 12435 + }, + { + "epoch": 0.8406110585372448, + "grad_norm": 0.6583990454673767, + "learning_rate": 1.9745671150044964e-06, + "loss": 0.12705230712890625, + "step": 12436 + }, + { + "epoch": 0.8406786535081789, + "grad_norm": 0.5178655385971069, + "learning_rate": 1.9729310923593257e-06, + "loss": 0.08249664306640625, + "step": 12437 + }, + { + "epoch": 0.8407462484791132, + "grad_norm": 0.23249846696853638, + "learning_rate": 1.971295700044763e-06, + "loss": 0.027341842651367188, + "step": 12438 + }, + { + "epoch": 0.8408138434500473, + "grad_norm": 0.24528278410434723, + "learning_rate": 1.9696609381399344e-06, + "loss": 0.03907012939453125, + "step": 12439 + }, + { + "epoch": 0.8408814384209815, + "grad_norm": 0.17214012145996094, + "learning_rate": 1.968026806723952e-06, + "loss": 0.029552459716796875, + "step": 12440 + }, + { + "epoch": 0.8409490333919156, + "grad_norm": 0.9794101119041443, + "learning_rate": 1.966393305875871e-06, + "loss": 0.1472930908203125, + "step": 12441 + }, + { + "epoch": 0.8410166283628498, + "grad_norm": 1.1829557418823242, + "learning_rate": 1.9647604356747413e-06, + "loss": 0.1441802978515625, + "step": 12442 + }, + { + "epoch": 0.841084223333784, + "grad_norm": 0.6959252953529358, + "learning_rate": 1.963128196199566e-06, + "loss": 0.104644775390625, + "step": 12443 + }, + { + "epoch": 0.8411518183047181, + "grad_norm": 0.7325893044471741, + "learning_rate": 1.9614965875293246e-06, + "loss": 0.101776123046875, + "step": 12444 + }, + { + "epoch": 0.8412194132756523, + "grad_norm": 1.561585545539856, + "learning_rate": 1.959865609742963e-06, + "loss": 0.193206787109375, + "step": 12445 + }, + { + "epoch": 0.8412870082465864, + "grad_norm": 1.118530511856079, + "learning_rate": 1.9582352629194e-06, + "loss": 0.145416259765625, + "step": 12446 + }, + { + "epoch": 0.8413546032175206, + "grad_norm": 0.596481204032898, + "learning_rate": 1.9566055471375195e-06, + "loss": 0.09627532958984375, + "step": 12447 + }, + { + "epoch": 0.8414221981884548, + "grad_norm": 0.8653178215026855, + "learning_rate": 1.9549764624761798e-06, + "loss": 0.12755203247070312, + "step": 12448 + }, + { + "epoch": 0.841489793159389, + "grad_norm": 0.9809178113937378, + "learning_rate": 1.9533480090142043e-06, + "loss": 0.1348419189453125, + "step": 12449 + }, + { + "epoch": 0.8415573881303231, + "grad_norm": 0.749564528465271, + "learning_rate": 1.9517201868303854e-06, + "loss": 0.13140869140625, + "step": 12450 + }, + { + "epoch": 0.8416249831012572, + "grad_norm": 1.0225743055343628, + "learning_rate": 1.950092996003498e-06, + "loss": 0.12430572509765625, + "step": 12451 + }, + { + "epoch": 0.8416925780721914, + "grad_norm": 1.452328085899353, + "learning_rate": 1.9484664366122594e-06, + "loss": 0.153350830078125, + "step": 12452 + }, + { + "epoch": 0.8417601730431256, + "grad_norm": 0.8492541313171387, + "learning_rate": 1.9468405087353886e-06, + "loss": 0.145263671875, + "step": 12453 + }, + { + "epoch": 0.8418277680140598, + "grad_norm": 0.52581787109375, + "learning_rate": 1.9452152124515437e-06, + "loss": 0.06397247314453125, + "step": 12454 + }, + { + "epoch": 0.8418953629849939, + "grad_norm": 0.245208278298378, + "learning_rate": 1.9435905478393775e-06, + "loss": 0.025888442993164062, + "step": 12455 + }, + { + "epoch": 0.8419629579559281, + "grad_norm": 1.4792120456695557, + "learning_rate": 1.9419665149774977e-06, + "loss": 0.16207122802734375, + "step": 12456 + }, + { + "epoch": 0.8420305529268622, + "grad_norm": 0.30350521206855774, + "learning_rate": 1.9403431139444844e-06, + "loss": 0.0533447265625, + "step": 12457 + }, + { + "epoch": 0.8420981478977964, + "grad_norm": 0.23480010032653809, + "learning_rate": 1.938720344818889e-06, + "loss": 0.03499412536621094, + "step": 12458 + }, + { + "epoch": 0.8421657428687306, + "grad_norm": 0.6634286046028137, + "learning_rate": 1.9370982076792302e-06, + "loss": 0.12823486328125, + "step": 12459 + }, + { + "epoch": 0.8422333378396647, + "grad_norm": 1.2235883474349976, + "learning_rate": 1.9354767026039975e-06, + "loss": 0.18567657470703125, + "step": 12460 + }, + { + "epoch": 0.8423009328105989, + "grad_norm": 0.33285436034202576, + "learning_rate": 1.9338558296716497e-06, + "loss": 0.060161590576171875, + "step": 12461 + }, + { + "epoch": 0.842368527781533, + "grad_norm": 0.42637184262275696, + "learning_rate": 1.9322355889606135e-06, + "loss": 0.05467987060546875, + "step": 12462 + }, + { + "epoch": 0.8424361227524673, + "grad_norm": 0.7236639857292175, + "learning_rate": 1.9306159805492836e-06, + "loss": 0.1089630126953125, + "step": 12463 + }, + { + "epoch": 0.8425037177234014, + "grad_norm": 0.32572898268699646, + "learning_rate": 1.928997004516038e-06, + "loss": 0.040882110595703125, + "step": 12464 + }, + { + "epoch": 0.8425713126943355, + "grad_norm": 0.5327647924423218, + "learning_rate": 1.927378660939197e-06, + "loss": 0.07819366455078125, + "step": 12465 + }, + { + "epoch": 0.8426389076652697, + "grad_norm": 1.8340235948562622, + "learning_rate": 1.925760949897082e-06, + "loss": 0.1976165771484375, + "step": 12466 + }, + { + "epoch": 0.8427065026362038, + "grad_norm": 0.7637589573860168, + "learning_rate": 1.924143871467954e-06, + "loss": 0.12247085571289062, + "step": 12467 + }, + { + "epoch": 0.8427740976071381, + "grad_norm": 0.213173970580101, + "learning_rate": 1.922527425730065e-06, + "loss": 0.03765106201171875, + "step": 12468 + }, + { + "epoch": 0.8428416925780722, + "grad_norm": 0.32363709807395935, + "learning_rate": 1.9209116127616265e-06, + "loss": 0.064849853515625, + "step": 12469 + }, + { + "epoch": 0.8429092875490064, + "grad_norm": 0.4215156137943268, + "learning_rate": 1.9192964326408242e-06, + "loss": 0.075103759765625, + "step": 12470 + }, + { + "epoch": 0.8429768825199405, + "grad_norm": 0.35403645038604736, + "learning_rate": 1.9176818854458084e-06, + "loss": 0.0654449462890625, + "step": 12471 + }, + { + "epoch": 0.8430444774908746, + "grad_norm": 1.4224694967269897, + "learning_rate": 1.9160679712547004e-06, + "loss": 0.190826416015625, + "step": 12472 + }, + { + "epoch": 0.8431120724618089, + "grad_norm": 0.5952008962631226, + "learning_rate": 1.914454690145591e-06, + "loss": 0.1048126220703125, + "step": 12473 + }, + { + "epoch": 0.843179667432743, + "grad_norm": 0.9678772687911987, + "learning_rate": 1.912842042196542e-06, + "loss": 0.17401885986328125, + "step": 12474 + }, + { + "epoch": 0.8432472624036772, + "grad_norm": 0.3215445280075073, + "learning_rate": 1.9112300274855856e-06, + "loss": 0.04546356201171875, + "step": 12475 + }, + { + "epoch": 0.8433148573746113, + "grad_norm": 0.31407785415649414, + "learning_rate": 1.9096186460907134e-06, + "loss": 0.0579376220703125, + "step": 12476 + }, + { + "epoch": 0.8433824523455455, + "grad_norm": 0.44812092185020447, + "learning_rate": 1.908007898089905e-06, + "loss": 0.059772491455078125, + "step": 12477 + }, + { + "epoch": 0.8434500473164797, + "grad_norm": 0.9199396371841431, + "learning_rate": 1.9063977835610886e-06, + "loss": 0.1515350341796875, + "step": 12478 + }, + { + "epoch": 0.8435176422874138, + "grad_norm": 0.4415431618690491, + "learning_rate": 1.9047883025821777e-06, + "loss": 0.07209014892578125, + "step": 12479 + }, + { + "epoch": 0.843585237258348, + "grad_norm": 1.1135350465774536, + "learning_rate": 1.9031794552310456e-06, + "loss": 0.1113128662109375, + "step": 12480 + }, + { + "epoch": 0.8436528322292821, + "grad_norm": 0.3141515254974365, + "learning_rate": 1.9015712415855413e-06, + "loss": 0.05504608154296875, + "step": 12481 + }, + { + "epoch": 0.8437204272002163, + "grad_norm": 0.40052613615989685, + "learning_rate": 1.8999636617234828e-06, + "loss": 0.0878143310546875, + "step": 12482 + }, + { + "epoch": 0.8437880221711505, + "grad_norm": 0.3582274317741394, + "learning_rate": 1.8983567157226429e-06, + "loss": 0.05859375, + "step": 12483 + }, + { + "epoch": 0.8438556171420847, + "grad_norm": 0.33880719542503357, + "learning_rate": 1.8967504036607858e-06, + "loss": 0.05135345458984375, + "step": 12484 + }, + { + "epoch": 0.8439232121130188, + "grad_norm": 0.26302868127822876, + "learning_rate": 1.8951447256156329e-06, + "loss": 0.05310821533203125, + "step": 12485 + }, + { + "epoch": 0.8439908070839529, + "grad_norm": 0.8210626244544983, + "learning_rate": 1.893539681664877e-06, + "loss": 0.165283203125, + "step": 12486 + }, + { + "epoch": 0.8440584020548871, + "grad_norm": 0.6666537523269653, + "learning_rate": 1.8919352718861794e-06, + "loss": 0.0829620361328125, + "step": 12487 + }, + { + "epoch": 0.8441259970258213, + "grad_norm": 0.6851615309715271, + "learning_rate": 1.890331496357171e-06, + "loss": 0.099151611328125, + "step": 12488 + }, + { + "epoch": 0.8441935919967555, + "grad_norm": 0.4089493453502655, + "learning_rate": 1.88872835515545e-06, + "loss": 0.06952667236328125, + "step": 12489 + }, + { + "epoch": 0.8442611869676896, + "grad_norm": 0.9908756613731384, + "learning_rate": 1.8871258483585963e-06, + "loss": 0.10316848754882812, + "step": 12490 + }, + { + "epoch": 0.8443287819386238, + "grad_norm": 0.33389997482299805, + "learning_rate": 1.8855239760441356e-06, + "loss": 0.056854248046875, + "step": 12491 + }, + { + "epoch": 0.8443963769095579, + "grad_norm": 1.1844950914382935, + "learning_rate": 1.8839227382895847e-06, + "loss": 0.1614837646484375, + "step": 12492 + }, + { + "epoch": 0.844463971880492, + "grad_norm": 1.693536400794983, + "learning_rate": 1.8823221351724247e-06, + "loss": 0.204437255859375, + "step": 12493 + }, + { + "epoch": 0.8445315668514263, + "grad_norm": 0.8400222659111023, + "learning_rate": 1.8807221667700902e-06, + "loss": 0.08752822875976562, + "step": 12494 + }, + { + "epoch": 0.8445991618223604, + "grad_norm": 1.0160349607467651, + "learning_rate": 1.8791228331600097e-06, + "loss": 0.17547607421875, + "step": 12495 + }, + { + "epoch": 0.8446667567932946, + "grad_norm": 1.1580201387405396, + "learning_rate": 1.8775241344195626e-06, + "loss": 0.19189453125, + "step": 12496 + }, + { + "epoch": 0.8447343517642287, + "grad_norm": 0.3343391418457031, + "learning_rate": 1.8759260706261073e-06, + "loss": 0.04705810546875, + "step": 12497 + }, + { + "epoch": 0.844801946735163, + "grad_norm": 0.5945776104927063, + "learning_rate": 1.8743286418569649e-06, + "loss": 0.0838775634765625, + "step": 12498 + }, + { + "epoch": 0.8448695417060971, + "grad_norm": 0.4711599349975586, + "learning_rate": 1.8727318481894323e-06, + "loss": 0.1010589599609375, + "step": 12499 + }, + { + "epoch": 0.8449371366770312, + "grad_norm": 0.29571831226348877, + "learning_rate": 1.8711356897007691e-06, + "loss": 0.0530853271484375, + "step": 12500 + }, + { + "epoch": 0.8450047316479654, + "grad_norm": 0.4257277250289917, + "learning_rate": 1.8695401664682088e-06, + "loss": 0.06735992431640625, + "step": 12501 + }, + { + "epoch": 0.8450723266188995, + "grad_norm": 0.3227176368236542, + "learning_rate": 1.867945278568951e-06, + "loss": 0.048126220703125, + "step": 12502 + }, + { + "epoch": 0.8451399215898338, + "grad_norm": 1.0721027851104736, + "learning_rate": 1.8663510260801726e-06, + "loss": 0.13831329345703125, + "step": 12503 + }, + { + "epoch": 0.8452075165607679, + "grad_norm": 0.9627622961997986, + "learning_rate": 1.864757409079003e-06, + "loss": 0.1246185302734375, + "step": 12504 + }, + { + "epoch": 0.8452751115317021, + "grad_norm": 0.2288053035736084, + "learning_rate": 1.8631644276425613e-06, + "loss": 0.039676666259765625, + "step": 12505 + }, + { + "epoch": 0.8453427065026362, + "grad_norm": 0.45386359095573425, + "learning_rate": 1.8615720818479238e-06, + "loss": 0.0600433349609375, + "step": 12506 + }, + { + "epoch": 0.8454103014735703, + "grad_norm": 0.26964226365089417, + "learning_rate": 1.8599803717721303e-06, + "loss": 0.042636871337890625, + "step": 12507 + }, + { + "epoch": 0.8454778964445046, + "grad_norm": 0.30876827239990234, + "learning_rate": 1.8583892974922063e-06, + "loss": 0.045654296875, + "step": 12508 + }, + { + "epoch": 0.8455454914154387, + "grad_norm": 0.5064713358879089, + "learning_rate": 1.8567988590851348e-06, + "loss": 0.094818115234375, + "step": 12509 + }, + { + "epoch": 0.8456130863863729, + "grad_norm": 0.6768487691879272, + "learning_rate": 1.8552090566278713e-06, + "loss": 0.11798095703125, + "step": 12510 + }, + { + "epoch": 0.845680681357307, + "grad_norm": 0.8758042454719543, + "learning_rate": 1.8536198901973388e-06, + "loss": 0.15586090087890625, + "step": 12511 + }, + { + "epoch": 0.8457482763282412, + "grad_norm": 0.27498960494995117, + "learning_rate": 1.8520313598704347e-06, + "loss": 0.04625701904296875, + "step": 12512 + }, + { + "epoch": 0.8458158712991753, + "grad_norm": 0.45466333627700806, + "learning_rate": 1.850443465724019e-06, + "loss": 0.07635498046875, + "step": 12513 + }, + { + "epoch": 0.8458834662701095, + "grad_norm": 0.5123671293258667, + "learning_rate": 1.848856207834923e-06, + "loss": 0.09600448608398438, + "step": 12514 + }, + { + "epoch": 0.8459510612410437, + "grad_norm": 0.7711812257766724, + "learning_rate": 1.8472695862799493e-06, + "loss": 0.12236785888671875, + "step": 12515 + }, + { + "epoch": 0.8460186562119778, + "grad_norm": 0.584709644317627, + "learning_rate": 1.845683601135873e-06, + "loss": 0.08465576171875, + "step": 12516 + }, + { + "epoch": 0.846086251182912, + "grad_norm": 0.23248521983623505, + "learning_rate": 1.8440982524794275e-06, + "loss": 0.024275779724121094, + "step": 12517 + }, + { + "epoch": 0.8461538461538461, + "grad_norm": 0.3668924868106842, + "learning_rate": 1.8425135403873216e-06, + "loss": 0.0670013427734375, + "step": 12518 + }, + { + "epoch": 0.8462214411247804, + "grad_norm": 1.555023431777954, + "learning_rate": 1.8409294649362424e-06, + "loss": 0.1624603271484375, + "step": 12519 + }, + { + "epoch": 0.8462890360957145, + "grad_norm": 0.49293243885040283, + "learning_rate": 1.839346026202825e-06, + "loss": 0.08050918579101562, + "step": 12520 + }, + { + "epoch": 0.8463566310666486, + "grad_norm": 0.37918636202812195, + "learning_rate": 1.8377632242636932e-06, + "loss": 0.06660842895507812, + "step": 12521 + }, + { + "epoch": 0.8464242260375828, + "grad_norm": 0.6346349716186523, + "learning_rate": 1.8361810591954343e-06, + "loss": 0.14697265625, + "step": 12522 + }, + { + "epoch": 0.8464918210085169, + "grad_norm": 0.815216600894928, + "learning_rate": 1.8345995310746e-06, + "loss": 0.1256561279296875, + "step": 12523 + }, + { + "epoch": 0.8465594159794512, + "grad_norm": 0.44341519474983215, + "learning_rate": 1.8330186399777143e-06, + "loss": 0.09677886962890625, + "step": 12524 + }, + { + "epoch": 0.8466270109503853, + "grad_norm": 0.8340465426445007, + "learning_rate": 1.8314383859812711e-06, + "loss": 0.0945892333984375, + "step": 12525 + }, + { + "epoch": 0.8466946059213194, + "grad_norm": 0.28307175636291504, + "learning_rate": 1.8298587691617309e-06, + "loss": 0.023611068725585938, + "step": 12526 + }, + { + "epoch": 0.8467622008922536, + "grad_norm": 0.33816444873809814, + "learning_rate": 1.8282797895955338e-06, + "loss": 0.0464019775390625, + "step": 12527 + }, + { + "epoch": 0.8468297958631877, + "grad_norm": 1.3870857954025269, + "learning_rate": 1.826701447359071e-06, + "loss": 0.193572998046875, + "step": 12528 + }, + { + "epoch": 0.846897390834122, + "grad_norm": 0.2556277811527252, + "learning_rate": 1.8251237425287176e-06, + "loss": 0.030961990356445312, + "step": 12529 + }, + { + "epoch": 0.8469649858050561, + "grad_norm": 0.4431969225406647, + "learning_rate": 1.8235466751808093e-06, + "loss": 0.062744140625, + "step": 12530 + }, + { + "epoch": 0.8470325807759903, + "grad_norm": 0.956301748752594, + "learning_rate": 1.8219702453916537e-06, + "loss": 0.10514068603515625, + "step": 12531 + }, + { + "epoch": 0.8471001757469244, + "grad_norm": 0.4840073585510254, + "learning_rate": 1.8203944532375378e-06, + "loss": 0.0959930419921875, + "step": 12532 + }, + { + "epoch": 0.8471677707178585, + "grad_norm": 0.8293899297714233, + "learning_rate": 1.818819298794694e-06, + "loss": 0.11096954345703125, + "step": 12533 + }, + { + "epoch": 0.8472353656887928, + "grad_norm": 0.6183205246925354, + "learning_rate": 1.8172447821393496e-06, + "loss": 0.112396240234375, + "step": 12534 + }, + { + "epoch": 0.8473029606597269, + "grad_norm": 0.3783004879951477, + "learning_rate": 1.8156709033476855e-06, + "loss": 0.045093536376953125, + "step": 12535 + }, + { + "epoch": 0.8473705556306611, + "grad_norm": 0.2559519112110138, + "learning_rate": 1.8140976624958538e-06, + "loss": 0.036102294921875, + "step": 12536 + }, + { + "epoch": 0.8474381506015952, + "grad_norm": 0.2287634015083313, + "learning_rate": 1.8125250596599807e-06, + "loss": 0.0429840087890625, + "step": 12537 + }, + { + "epoch": 0.8475057455725294, + "grad_norm": 0.30450862646102905, + "learning_rate": 1.8109530949161585e-06, + "loss": 0.065460205078125, + "step": 12538 + }, + { + "epoch": 0.8475733405434636, + "grad_norm": 0.6466052532196045, + "learning_rate": 1.8093817683404428e-06, + "loss": 0.08605194091796875, + "step": 12539 + }, + { + "epoch": 0.8476409355143977, + "grad_norm": 0.9684356451034546, + "learning_rate": 1.8078110800088748e-06, + "loss": 0.11408233642578125, + "step": 12540 + }, + { + "epoch": 0.8477085304853319, + "grad_norm": 0.4331192076206207, + "learning_rate": 1.8062410299974453e-06, + "loss": 0.0886688232421875, + "step": 12541 + }, + { + "epoch": 0.847776125456266, + "grad_norm": 0.6452849507331848, + "learning_rate": 1.8046716183821233e-06, + "loss": 0.098297119140625, + "step": 12542 + }, + { + "epoch": 0.8478437204272002, + "grad_norm": 0.5517042279243469, + "learning_rate": 1.8031028452388548e-06, + "loss": 0.1209716796875, + "step": 12543 + }, + { + "epoch": 0.8479113153981344, + "grad_norm": 0.3364531993865967, + "learning_rate": 1.8015347106435343e-06, + "loss": 0.03782844543457031, + "step": 12544 + }, + { + "epoch": 0.8479789103690686, + "grad_norm": 0.6780735850334167, + "learning_rate": 1.7999672146720525e-06, + "loss": 0.1137847900390625, + "step": 12545 + }, + { + "epoch": 0.8480465053400027, + "grad_norm": 0.4832431674003601, + "learning_rate": 1.7984003574002406e-06, + "loss": 0.056865692138671875, + "step": 12546 + }, + { + "epoch": 0.8481141003109368, + "grad_norm": 0.738152265548706, + "learning_rate": 1.7968341389039212e-06, + "loss": 0.101715087890625, + "step": 12547 + }, + { + "epoch": 0.848181695281871, + "grad_norm": 1.517419695854187, + "learning_rate": 1.795268559258877e-06, + "loss": 0.1424560546875, + "step": 12548 + }, + { + "epoch": 0.8482492902528052, + "grad_norm": 0.6358879208564758, + "learning_rate": 1.793703618540859e-06, + "loss": 0.090911865234375, + "step": 12549 + }, + { + "epoch": 0.8483168852237394, + "grad_norm": 0.9845544099807739, + "learning_rate": 1.7921393168255885e-06, + "loss": 0.12966537475585938, + "step": 12550 + }, + { + "epoch": 0.8483844801946735, + "grad_norm": 1.205496072769165, + "learning_rate": 1.7905756541887563e-06, + "loss": 0.16545867919921875, + "step": 12551 + }, + { + "epoch": 0.8484520751656077, + "grad_norm": 0.6941547989845276, + "learning_rate": 1.789012630706024e-06, + "loss": 0.10799407958984375, + "step": 12552 + }, + { + "epoch": 0.8485196701365418, + "grad_norm": 1.142345666885376, + "learning_rate": 1.7874502464530173e-06, + "loss": 0.12042999267578125, + "step": 12553 + }, + { + "epoch": 0.848587265107476, + "grad_norm": 0.26535564661026, + "learning_rate": 1.785888501505336e-06, + "loss": 0.03443431854248047, + "step": 12554 + }, + { + "epoch": 0.8486548600784102, + "grad_norm": 0.4988434612751007, + "learning_rate": 1.7843273959385449e-06, + "loss": 0.08725738525390625, + "step": 12555 + }, + { + "epoch": 0.8487224550493443, + "grad_norm": 0.797081708908081, + "learning_rate": 1.7827669298281861e-06, + "loss": 0.109893798828125, + "step": 12556 + }, + { + "epoch": 0.8487900500202785, + "grad_norm": 0.23906852304935455, + "learning_rate": 1.781207103249755e-06, + "loss": 0.052539825439453125, + "step": 12557 + }, + { + "epoch": 0.8488576449912126, + "grad_norm": 0.5029200315475464, + "learning_rate": 1.7796479162787376e-06, + "loss": 0.09163665771484375, + "step": 12558 + }, + { + "epoch": 0.8489252399621469, + "grad_norm": 1.0717971324920654, + "learning_rate": 1.7780893689905637e-06, + "loss": 0.13089752197265625, + "step": 12559 + }, + { + "epoch": 0.848992834933081, + "grad_norm": 1.055679202079773, + "learning_rate": 1.7765314614606558e-06, + "loss": 0.129180908203125, + "step": 12560 + }, + { + "epoch": 0.8490604299040151, + "grad_norm": 0.16923189163208008, + "learning_rate": 1.7749741937643926e-06, + "loss": 0.021493911743164062, + "step": 12561 + }, + { + "epoch": 0.8491280248749493, + "grad_norm": 0.6326895952224731, + "learning_rate": 1.7734175659771219e-06, + "loss": 0.10507965087890625, + "step": 12562 + }, + { + "epoch": 0.8491956198458834, + "grad_norm": 0.7651485800743103, + "learning_rate": 1.771861578174165e-06, + "loss": 0.1259002685546875, + "step": 12563 + }, + { + "epoch": 0.8492632148168177, + "grad_norm": 0.6628239750862122, + "learning_rate": 1.7703062304308098e-06, + "loss": 0.07632064819335938, + "step": 12564 + }, + { + "epoch": 0.8493308097877518, + "grad_norm": 1.102787971496582, + "learning_rate": 1.7687515228223134e-06, + "loss": 0.1206512451171875, + "step": 12565 + }, + { + "epoch": 0.849398404758686, + "grad_norm": 0.4967746436595917, + "learning_rate": 1.7671974554239033e-06, + "loss": 0.1170501708984375, + "step": 12566 + }, + { + "epoch": 0.8494659997296201, + "grad_norm": 0.38104522228240967, + "learning_rate": 1.765644028310775e-06, + "loss": 0.07472610473632812, + "step": 12567 + }, + { + "epoch": 0.8495335947005542, + "grad_norm": 0.7543094754219055, + "learning_rate": 1.7640912415580878e-06, + "loss": 0.1346435546875, + "step": 12568 + }, + { + "epoch": 0.8496011896714885, + "grad_norm": 0.3719490170478821, + "learning_rate": 1.7625390952409854e-06, + "loss": 0.050640106201171875, + "step": 12569 + }, + { + "epoch": 0.8496687846424226, + "grad_norm": 0.7624958753585815, + "learning_rate": 1.760987589434559e-06, + "loss": 0.12853240966796875, + "step": 12570 + }, + { + "epoch": 0.8497363796133568, + "grad_norm": 0.3757568895816803, + "learning_rate": 1.7594367242138886e-06, + "loss": 0.048065185546875, + "step": 12571 + }, + { + "epoch": 0.8498039745842909, + "grad_norm": 0.6826402544975281, + "learning_rate": 1.7578864996540128e-06, + "loss": 0.14604949951171875, + "step": 12572 + }, + { + "epoch": 0.8498715695552251, + "grad_norm": 0.3648700714111328, + "learning_rate": 1.7563369158299381e-06, + "loss": 0.055484771728515625, + "step": 12573 + }, + { + "epoch": 0.8499391645261593, + "grad_norm": 0.7961686849594116, + "learning_rate": 1.754787972816646e-06, + "loss": 0.147674560546875, + "step": 12574 + }, + { + "epoch": 0.8500067594970934, + "grad_norm": 1.5110176801681519, + "learning_rate": 1.753239670689082e-06, + "loss": 0.150848388671875, + "step": 12575 + }, + { + "epoch": 0.8500743544680276, + "grad_norm": 0.6546061038970947, + "learning_rate": 1.7516920095221639e-06, + "loss": 0.13043975830078125, + "step": 12576 + }, + { + "epoch": 0.8501419494389617, + "grad_norm": 0.4942402243614197, + "learning_rate": 1.7501449893907772e-06, + "loss": 0.08648681640625, + "step": 12577 + }, + { + "epoch": 0.8502095444098959, + "grad_norm": 0.5621593594551086, + "learning_rate": 1.7485986103697753e-06, + "loss": 0.09273910522460938, + "step": 12578 + }, + { + "epoch": 0.85027713938083, + "grad_norm": 0.6531427502632141, + "learning_rate": 1.7470528725339818e-06, + "loss": 0.092315673828125, + "step": 12579 + }, + { + "epoch": 0.8503447343517643, + "grad_norm": 1.8977047204971313, + "learning_rate": 1.74550777595819e-06, + "loss": 0.192657470703125, + "step": 12580 + }, + { + "epoch": 0.8504123293226984, + "grad_norm": 1.2641814947128296, + "learning_rate": 1.7439633207171585e-06, + "loss": 0.1629638671875, + "step": 12581 + }, + { + "epoch": 0.8504799242936325, + "grad_norm": 1.2744778394699097, + "learning_rate": 1.742419506885624e-06, + "loss": 0.218505859375, + "step": 12582 + }, + { + "epoch": 0.8505475192645667, + "grad_norm": 0.7292867302894592, + "learning_rate": 1.7408763345382772e-06, + "loss": 0.1215362548828125, + "step": 12583 + }, + { + "epoch": 0.8506151142355008, + "grad_norm": 0.9950034022331238, + "learning_rate": 1.7393338037497948e-06, + "loss": 0.13632965087890625, + "step": 12584 + }, + { + "epoch": 0.8506827092064351, + "grad_norm": 0.9370466470718384, + "learning_rate": 1.7377919145948086e-06, + "loss": 0.08943557739257812, + "step": 12585 + }, + { + "epoch": 0.8507503041773692, + "grad_norm": 1.8102824687957764, + "learning_rate": 1.7362506671479272e-06, + "loss": 0.228973388671875, + "step": 12586 + }, + { + "epoch": 0.8508178991483034, + "grad_norm": 0.5441098213195801, + "learning_rate": 1.7347100614837247e-06, + "loss": 0.09587478637695312, + "step": 12587 + }, + { + "epoch": 0.8508854941192375, + "grad_norm": 0.6517146229743958, + "learning_rate": 1.7331700976767444e-06, + "loss": 0.0699920654296875, + "step": 12588 + }, + { + "epoch": 0.8509530890901716, + "grad_norm": 2.7048137187957764, + "learning_rate": 1.7316307758015022e-06, + "loss": 0.13787841796875, + "step": 12589 + }, + { + "epoch": 0.8510206840611059, + "grad_norm": 1.6008471250534058, + "learning_rate": 1.730092095932476e-06, + "loss": 0.216522216796875, + "step": 12590 + }, + { + "epoch": 0.85108827903204, + "grad_norm": 0.3398291766643524, + "learning_rate": 1.7285540581441206e-06, + "loss": 0.06672286987304688, + "step": 12591 + }, + { + "epoch": 0.8511558740029742, + "grad_norm": 0.19705940783023834, + "learning_rate": 1.727016662510854e-06, + "loss": 0.03472137451171875, + "step": 12592 + }, + { + "epoch": 0.8512234689739083, + "grad_norm": 0.8102408647537231, + "learning_rate": 1.7254799091070638e-06, + "loss": 0.12311553955078125, + "step": 12593 + }, + { + "epoch": 0.8512910639448426, + "grad_norm": 0.8836013674736023, + "learning_rate": 1.723943798007107e-06, + "loss": 0.1184539794921875, + "step": 12594 + }, + { + "epoch": 0.8513586589157767, + "grad_norm": 0.42676183581352234, + "learning_rate": 1.7224083292853176e-06, + "loss": 0.06538867950439453, + "step": 12595 + }, + { + "epoch": 0.8514262538867108, + "grad_norm": 0.859697163105011, + "learning_rate": 1.7208735030159812e-06, + "loss": 0.1679534912109375, + "step": 12596 + }, + { + "epoch": 0.851493848857645, + "grad_norm": 1.045547366142273, + "learning_rate": 1.719339319273368e-06, + "loss": 0.1150360107421875, + "step": 12597 + }, + { + "epoch": 0.8515614438285791, + "grad_norm": 0.7088919281959534, + "learning_rate": 1.7178057781317141e-06, + "loss": 0.12374114990234375, + "step": 12598 + }, + { + "epoch": 0.8516290387995133, + "grad_norm": 0.3292332589626312, + "learning_rate": 1.7162728796652099e-06, + "loss": 0.05291748046875, + "step": 12599 + }, + { + "epoch": 0.8516966337704475, + "grad_norm": 0.26181402802467346, + "learning_rate": 1.7147406239480395e-06, + "loss": 0.036029815673828125, + "step": 12600 + }, + { + "epoch": 0.8517642287413817, + "grad_norm": 0.5245386958122253, + "learning_rate": 1.7132090110543369e-06, + "loss": 0.1020050048828125, + "step": 12601 + }, + { + "epoch": 0.8518318237123158, + "grad_norm": 0.45663562417030334, + "learning_rate": 1.711678041058213e-06, + "loss": 0.0634613037109375, + "step": 12602 + }, + { + "epoch": 0.8518994186832499, + "grad_norm": 0.5680230259895325, + "learning_rate": 1.7101477140337428e-06, + "loss": 0.1196746826171875, + "step": 12603 + }, + { + "epoch": 0.8519670136541841, + "grad_norm": 0.6023957133293152, + "learning_rate": 1.7086180300549765e-06, + "loss": 0.09349822998046875, + "step": 12604 + }, + { + "epoch": 0.8520346086251183, + "grad_norm": 0.9484821557998657, + "learning_rate": 1.7070889891959273e-06, + "loss": 0.199737548828125, + "step": 12605 + }, + { + "epoch": 0.8521022035960525, + "grad_norm": 0.28638219833374023, + "learning_rate": 1.7055605915305816e-06, + "loss": 0.04114532470703125, + "step": 12606 + }, + { + "epoch": 0.8521697985669866, + "grad_norm": 1.1961578130722046, + "learning_rate": 1.7040328371328867e-06, + "loss": 0.165435791015625, + "step": 12607 + }, + { + "epoch": 0.8522373935379208, + "grad_norm": 0.9313391447067261, + "learning_rate": 1.7025057260767785e-06, + "loss": 0.10700416564941406, + "step": 12608 + }, + { + "epoch": 0.8523049885088549, + "grad_norm": 0.9502792954444885, + "learning_rate": 1.7009792584361312e-06, + "loss": 0.1598663330078125, + "step": 12609 + }, + { + "epoch": 0.8523725834797891, + "grad_norm": 0.7804898023605347, + "learning_rate": 1.6994534342848172e-06, + "loss": 0.1573028564453125, + "step": 12610 + }, + { + "epoch": 0.8524401784507233, + "grad_norm": 0.7662633657455444, + "learning_rate": 1.6979282536966645e-06, + "loss": 0.1398162841796875, + "step": 12611 + }, + { + "epoch": 0.8525077734216574, + "grad_norm": 0.7604819536209106, + "learning_rate": 1.6964037167454605e-06, + "loss": 0.10236358642578125, + "step": 12612 + }, + { + "epoch": 0.8525753683925916, + "grad_norm": 0.7198020815849304, + "learning_rate": 1.6948798235049829e-06, + "loss": 0.177398681640625, + "step": 12613 + }, + { + "epoch": 0.8526429633635257, + "grad_norm": 0.6220574378967285, + "learning_rate": 1.6933565740489625e-06, + "loss": 0.07832717895507812, + "step": 12614 + }, + { + "epoch": 0.85271055833446, + "grad_norm": 0.3779604136943817, + "learning_rate": 1.6918339684511041e-06, + "loss": 0.0543975830078125, + "step": 12615 + }, + { + "epoch": 0.8527781533053941, + "grad_norm": 1.0375206470489502, + "learning_rate": 1.6903120067850819e-06, + "loss": 0.1191558837890625, + "step": 12616 + }, + { + "epoch": 0.8528457482763282, + "grad_norm": 0.4722400903701782, + "learning_rate": 1.6887906891245353e-06, + "loss": 0.0823211669921875, + "step": 12617 + }, + { + "epoch": 0.8529133432472624, + "grad_norm": 1.1535059213638306, + "learning_rate": 1.6872700155430753e-06, + "loss": 0.219451904296875, + "step": 12618 + }, + { + "epoch": 0.8529809382181965, + "grad_norm": 0.29026705026626587, + "learning_rate": 1.6857499861142883e-06, + "loss": 0.04639434814453125, + "step": 12619 + }, + { + "epoch": 0.8530485331891308, + "grad_norm": 0.6081207990646362, + "learning_rate": 1.6842306009117104e-06, + "loss": 0.113037109375, + "step": 12620 + }, + { + "epoch": 0.8531161281600649, + "grad_norm": 1.6284620761871338, + "learning_rate": 1.6827118600088725e-06, + "loss": 0.271697998046875, + "step": 12621 + }, + { + "epoch": 0.8531837231309991, + "grad_norm": 0.40403854846954346, + "learning_rate": 1.6811937634792513e-06, + "loss": 0.06711578369140625, + "step": 12622 + }, + { + "epoch": 0.8532513181019332, + "grad_norm": 0.9764885902404785, + "learning_rate": 1.6796763113963027e-06, + "loss": 0.170013427734375, + "step": 12623 + }, + { + "epoch": 0.8533189130728673, + "grad_norm": 1.3399388790130615, + "learning_rate": 1.6781595038334563e-06, + "loss": 0.208465576171875, + "step": 12624 + }, + { + "epoch": 0.8533865080438016, + "grad_norm": 0.7227217555046082, + "learning_rate": 1.676643340864097e-06, + "loss": 0.12070465087890625, + "step": 12625 + }, + { + "epoch": 0.8534541030147357, + "grad_norm": 0.46398481726646423, + "learning_rate": 1.6751278225615907e-06, + "loss": 0.06238555908203125, + "step": 12626 + }, + { + "epoch": 0.8535216979856699, + "grad_norm": 1.0688759088516235, + "learning_rate": 1.6736129489992673e-06, + "loss": 0.17327880859375, + "step": 12627 + }, + { + "epoch": 0.853589292956604, + "grad_norm": 0.8706232309341431, + "learning_rate": 1.6720987202504268e-06, + "loss": 0.1658935546875, + "step": 12628 + }, + { + "epoch": 0.8536568879275382, + "grad_norm": 0.267923504114151, + "learning_rate": 1.6705851363883352e-06, + "loss": 0.03217029571533203, + "step": 12629 + }, + { + "epoch": 0.8537244828984724, + "grad_norm": 1.1430624723434448, + "learning_rate": 1.6690721974862288e-06, + "loss": 0.2015838623046875, + "step": 12630 + }, + { + "epoch": 0.8537920778694065, + "grad_norm": 1.1735764741897583, + "learning_rate": 1.6675599036173094e-06, + "loss": 0.187774658203125, + "step": 12631 + }, + { + "epoch": 0.8538596728403407, + "grad_norm": 0.5622799396514893, + "learning_rate": 1.6660482548547633e-06, + "loss": 0.126739501953125, + "step": 12632 + }, + { + "epoch": 0.8539272678112748, + "grad_norm": 0.270153671503067, + "learning_rate": 1.6645372512717223e-06, + "loss": 0.0529937744140625, + "step": 12633 + }, + { + "epoch": 0.853994862782209, + "grad_norm": 0.264113187789917, + "learning_rate": 1.663026892941299e-06, + "loss": 0.034572601318359375, + "step": 12634 + }, + { + "epoch": 0.8540624577531432, + "grad_norm": 0.5978004932403564, + "learning_rate": 1.6615171799365803e-06, + "loss": 0.123077392578125, + "step": 12635 + }, + { + "epoch": 0.8541300527240774, + "grad_norm": 0.40505215525627136, + "learning_rate": 1.6600081123306077e-06, + "loss": 0.0698089599609375, + "step": 12636 + }, + { + "epoch": 0.8541976476950115, + "grad_norm": 0.9799116253852844, + "learning_rate": 1.6584996901964077e-06, + "loss": 0.191436767578125, + "step": 12637 + }, + { + "epoch": 0.8542652426659456, + "grad_norm": 0.8853522539138794, + "learning_rate": 1.6569919136069573e-06, + "loss": 0.11367416381835938, + "step": 12638 + }, + { + "epoch": 0.8543328376368798, + "grad_norm": 0.6213499307632446, + "learning_rate": 1.655484782635221e-06, + "loss": 0.10298919677734375, + "step": 12639 + }, + { + "epoch": 0.854400432607814, + "grad_norm": 1.2945480346679688, + "learning_rate": 1.6539782973541173e-06, + "loss": 0.1769866943359375, + "step": 12640 + }, + { + "epoch": 0.8544680275787482, + "grad_norm": 0.7432214617729187, + "learning_rate": 1.652472457836543e-06, + "loss": 0.1288909912109375, + "step": 12641 + }, + { + "epoch": 0.8545356225496823, + "grad_norm": 0.6345567107200623, + "learning_rate": 1.6509672641553596e-06, + "loss": 0.10105133056640625, + "step": 12642 + }, + { + "epoch": 0.8546032175206165, + "grad_norm": 1.1299126148223877, + "learning_rate": 1.6494627163833958e-06, + "loss": 0.13018035888671875, + "step": 12643 + }, + { + "epoch": 0.8546708124915506, + "grad_norm": 0.669694721698761, + "learning_rate": 1.6479588145934483e-06, + "loss": 0.10010147094726562, + "step": 12644 + }, + { + "epoch": 0.8547384074624848, + "grad_norm": 1.1559984683990479, + "learning_rate": 1.6464555588582953e-06, + "loss": 0.173828125, + "step": 12645 + }, + { + "epoch": 0.854806002433419, + "grad_norm": 0.5310428142547607, + "learning_rate": 1.6449529492506655e-06, + "loss": 0.0971221923828125, + "step": 12646 + }, + { + "epoch": 0.8548735974043531, + "grad_norm": 0.4568787217140198, + "learning_rate": 1.6434509858432607e-06, + "loss": 0.087677001953125, + "step": 12647 + }, + { + "epoch": 0.8549411923752873, + "grad_norm": 0.514815628528595, + "learning_rate": 1.6419496687087676e-06, + "loss": 0.09520721435546875, + "step": 12648 + }, + { + "epoch": 0.8550087873462214, + "grad_norm": 0.5021654963493347, + "learning_rate": 1.6404489979198168e-06, + "loss": 0.0937347412109375, + "step": 12649 + }, + { + "epoch": 0.8550763823171557, + "grad_norm": 1.5506826639175415, + "learning_rate": 1.6389489735490315e-06, + "loss": 0.20733642578125, + "step": 12650 + }, + { + "epoch": 0.8551439772880898, + "grad_norm": 0.5579481720924377, + "learning_rate": 1.6374495956689805e-06, + "loss": 0.09002685546875, + "step": 12651 + }, + { + "epoch": 0.8552115722590239, + "grad_norm": 0.4956158995628357, + "learning_rate": 1.6359508643522225e-06, + "loss": 0.096954345703125, + "step": 12652 + }, + { + "epoch": 0.8552791672299581, + "grad_norm": 0.4639390707015991, + "learning_rate": 1.634452779671271e-06, + "loss": 0.06597137451171875, + "step": 12653 + }, + { + "epoch": 0.8553467622008922, + "grad_norm": 1.1105878353118896, + "learning_rate": 1.6329553416986133e-06, + "loss": 0.19549560546875, + "step": 12654 + }, + { + "epoch": 0.8554143571718265, + "grad_norm": 0.4137652516365051, + "learning_rate": 1.6314585505067042e-06, + "loss": 0.07538795471191406, + "step": 12655 + }, + { + "epoch": 0.8554819521427606, + "grad_norm": 0.5967609286308289, + "learning_rate": 1.6299624061679697e-06, + "loss": 0.122711181640625, + "step": 12656 + }, + { + "epoch": 0.8555495471136947, + "grad_norm": 0.7423819303512573, + "learning_rate": 1.6284669087548016e-06, + "loss": 0.1583099365234375, + "step": 12657 + }, + { + "epoch": 0.8556171420846289, + "grad_norm": 0.4622874855995178, + "learning_rate": 1.6269720583395604e-06, + "loss": 0.081390380859375, + "step": 12658 + }, + { + "epoch": 0.855684737055563, + "grad_norm": 0.6714500784873962, + "learning_rate": 1.6254778549945764e-06, + "loss": 0.1180572509765625, + "step": 12659 + }, + { + "epoch": 0.8557523320264973, + "grad_norm": 0.5276075005531311, + "learning_rate": 1.6239842987921472e-06, + "loss": 0.06486892700195312, + "step": 12660 + }, + { + "epoch": 0.8558199269974314, + "grad_norm": 0.38777032494544983, + "learning_rate": 1.6224913898045478e-06, + "loss": 0.0735321044921875, + "step": 12661 + }, + { + "epoch": 0.8558875219683656, + "grad_norm": 0.49108585715293884, + "learning_rate": 1.6209991281040026e-06, + "loss": 0.0872802734375, + "step": 12662 + }, + { + "epoch": 0.8559551169392997, + "grad_norm": 0.7438933253288269, + "learning_rate": 1.6195075137627252e-06, + "loss": 0.14971923828125, + "step": 12663 + }, + { + "epoch": 0.8560227119102338, + "grad_norm": 1.0915887355804443, + "learning_rate": 1.618016546852888e-06, + "loss": 0.1895751953125, + "step": 12664 + }, + { + "epoch": 0.856090306881168, + "grad_norm": 0.3268563151359558, + "learning_rate": 1.6165262274466314e-06, + "loss": 0.05562877655029297, + "step": 12665 + }, + { + "epoch": 0.8561579018521022, + "grad_norm": 0.48714300990104675, + "learning_rate": 1.6150365556160663e-06, + "loss": 0.09787368774414062, + "step": 12666 + }, + { + "epoch": 0.8562254968230364, + "grad_norm": 1.2762808799743652, + "learning_rate": 1.613547531433273e-06, + "loss": 0.210601806640625, + "step": 12667 + }, + { + "epoch": 0.8562930917939705, + "grad_norm": 1.3974900245666504, + "learning_rate": 1.6120591549703013e-06, + "loss": 0.196380615234375, + "step": 12668 + }, + { + "epoch": 0.8563606867649047, + "grad_norm": 1.247275471687317, + "learning_rate": 1.6105714262991644e-06, + "loss": 0.17134857177734375, + "step": 12669 + }, + { + "epoch": 0.8564282817358388, + "grad_norm": 1.3295183181762695, + "learning_rate": 1.6090843454918503e-06, + "loss": 0.2179718017578125, + "step": 12670 + }, + { + "epoch": 0.856495876706773, + "grad_norm": 0.8566232919692993, + "learning_rate": 1.6075979126203128e-06, + "loss": 0.13401031494140625, + "step": 12671 + }, + { + "epoch": 0.8565634716777072, + "grad_norm": 0.7221418619155884, + "learning_rate": 1.6061121277564743e-06, + "loss": 0.164642333984375, + "step": 12672 + }, + { + "epoch": 0.8566310666486413, + "grad_norm": 0.944083571434021, + "learning_rate": 1.6046269909722243e-06, + "loss": 0.142822265625, + "step": 12673 + }, + { + "epoch": 0.8566986616195755, + "grad_norm": 1.0193501710891724, + "learning_rate": 1.603142502339432e-06, + "loss": 0.1538238525390625, + "step": 12674 + }, + { + "epoch": 0.8567662565905096, + "grad_norm": 0.6200941205024719, + "learning_rate": 1.601658661929913e-06, + "loss": 0.1374969482421875, + "step": 12675 + }, + { + "epoch": 0.8568338515614439, + "grad_norm": 0.9340165853500366, + "learning_rate": 1.600175469815473e-06, + "loss": 0.17193603515625, + "step": 12676 + }, + { + "epoch": 0.856901446532378, + "grad_norm": 1.9398136138916016, + "learning_rate": 1.5986929260678785e-06, + "loss": 0.1754150390625, + "step": 12677 + }, + { + "epoch": 0.8569690415033121, + "grad_norm": 0.38483378291130066, + "learning_rate": 1.5972110307588616e-06, + "loss": 0.099517822265625, + "step": 12678 + }, + { + "epoch": 0.8570366364742463, + "grad_norm": 1.4017120599746704, + "learning_rate": 1.5957297839601254e-06, + "loss": 0.1932373046875, + "step": 12679 + }, + { + "epoch": 0.8571042314451804, + "grad_norm": 0.544485867023468, + "learning_rate": 1.5942491857433423e-06, + "loss": 0.059284210205078125, + "step": 12680 + }, + { + "epoch": 0.8571718264161147, + "grad_norm": 0.5141434669494629, + "learning_rate": 1.5927692361801533e-06, + "loss": 0.0962066650390625, + "step": 12681 + }, + { + "epoch": 0.8572394213870488, + "grad_norm": 0.8747871518135071, + "learning_rate": 1.5912899353421678e-06, + "loss": 0.1066741943359375, + "step": 12682 + }, + { + "epoch": 0.857307016357983, + "grad_norm": 0.3193434476852417, + "learning_rate": 1.5898112833009637e-06, + "loss": 0.05413055419921875, + "step": 12683 + }, + { + "epoch": 0.8573746113289171, + "grad_norm": 0.7214104533195496, + "learning_rate": 1.5883332801280853e-06, + "loss": 0.101165771484375, + "step": 12684 + }, + { + "epoch": 0.8574422062998512, + "grad_norm": 0.22633853554725647, + "learning_rate": 1.5868559258950505e-06, + "loss": 0.03802490234375, + "step": 12685 + }, + { + "epoch": 0.8575098012707855, + "grad_norm": 0.2823435664176941, + "learning_rate": 1.5853792206733369e-06, + "loss": 0.0378875732421875, + "step": 12686 + }, + { + "epoch": 0.8575773962417196, + "grad_norm": 1.0784767866134644, + "learning_rate": 1.5839031645344092e-06, + "loss": 0.171630859375, + "step": 12687 + }, + { + "epoch": 0.8576449912126538, + "grad_norm": 1.1250869035720825, + "learning_rate": 1.5824277575496722e-06, + "loss": 0.1761932373046875, + "step": 12688 + }, + { + "epoch": 0.8577125861835879, + "grad_norm": 1.2510597705841064, + "learning_rate": 1.580952999790528e-06, + "loss": 0.189208984375, + "step": 12689 + }, + { + "epoch": 0.8577801811545221, + "grad_norm": 0.9296419620513916, + "learning_rate": 1.5794788913283287e-06, + "loss": 0.2119598388671875, + "step": 12690 + }, + { + "epoch": 0.8578477761254563, + "grad_norm": 0.8233460783958435, + "learning_rate": 1.5780054322344017e-06, + "loss": 0.19677734375, + "step": 12691 + }, + { + "epoch": 0.8579153710963904, + "grad_norm": 0.5952484607696533, + "learning_rate": 1.5765326225800435e-06, + "loss": 0.10596466064453125, + "step": 12692 + }, + { + "epoch": 0.8579829660673246, + "grad_norm": 0.6623729467391968, + "learning_rate": 1.5750604624365156e-06, + "loss": 0.1488037109375, + "step": 12693 + }, + { + "epoch": 0.8580505610382587, + "grad_norm": 0.7779528498649597, + "learning_rate": 1.5735889518750523e-06, + "loss": 0.16156005859375, + "step": 12694 + }, + { + "epoch": 0.858118156009193, + "grad_norm": 0.599277675151825, + "learning_rate": 1.572118090966852e-06, + "loss": 0.09567642211914062, + "step": 12695 + }, + { + "epoch": 0.8581857509801271, + "grad_norm": 0.6962384581565857, + "learning_rate": 1.5706478797830874e-06, + "loss": 0.154632568359375, + "step": 12696 + }, + { + "epoch": 0.8582533459510613, + "grad_norm": 0.3662048280239105, + "learning_rate": 1.5691783183948938e-06, + "loss": 0.05865478515625, + "step": 12697 + }, + { + "epoch": 0.8583209409219954, + "grad_norm": 0.28450441360473633, + "learning_rate": 1.567709406873379e-06, + "loss": 0.026430130004882812, + "step": 12698 + }, + { + "epoch": 0.8583885358929295, + "grad_norm": 0.43914201855659485, + "learning_rate": 1.5662411452896124e-06, + "loss": 0.07942962646484375, + "step": 12699 + }, + { + "epoch": 0.8584561308638637, + "grad_norm": 1.399889349937439, + "learning_rate": 1.5647735337146513e-06, + "loss": 0.224853515625, + "step": 12700 + }, + { + "epoch": 0.8585237258347979, + "grad_norm": 0.6428226828575134, + "learning_rate": 1.5633065722194934e-06, + "loss": 0.1083984375, + "step": 12701 + }, + { + "epoch": 0.8585913208057321, + "grad_norm": 0.2576909363269806, + "learning_rate": 1.561840260875127e-06, + "loss": 0.0385589599609375, + "step": 12702 + }, + { + "epoch": 0.8586589157766662, + "grad_norm": 0.834175169467926, + "learning_rate": 1.5603745997525036e-06, + "loss": 0.12592315673828125, + "step": 12703 + }, + { + "epoch": 0.8587265107476004, + "grad_norm": 0.4962061643600464, + "learning_rate": 1.5589095889225301e-06, + "loss": 0.09520721435546875, + "step": 12704 + }, + { + "epoch": 0.8587941057185345, + "grad_norm": 0.532749593257904, + "learning_rate": 1.5574452284561047e-06, + "loss": 0.080474853515625, + "step": 12705 + }, + { + "epoch": 0.8588617006894687, + "grad_norm": 0.5007956027984619, + "learning_rate": 1.5559815184240773e-06, + "loss": 0.09609222412109375, + "step": 12706 + }, + { + "epoch": 0.8589292956604029, + "grad_norm": 0.2633475661277771, + "learning_rate": 1.554518458897271e-06, + "loss": 0.03639984130859375, + "step": 12707 + }, + { + "epoch": 0.858996890631337, + "grad_norm": 0.20654705166816711, + "learning_rate": 1.553056049946478e-06, + "loss": 0.03018951416015625, + "step": 12708 + }, + { + "epoch": 0.8590644856022712, + "grad_norm": 0.7774065732955933, + "learning_rate": 1.551594291642458e-06, + "loss": 0.138702392578125, + "step": 12709 + }, + { + "epoch": 0.8591320805732053, + "grad_norm": 1.4213906526565552, + "learning_rate": 1.5501331840559396e-06, + "loss": 0.13694000244140625, + "step": 12710 + }, + { + "epoch": 0.8591996755441396, + "grad_norm": 0.682453453540802, + "learning_rate": 1.5486727272576278e-06, + "loss": 0.1414947509765625, + "step": 12711 + }, + { + "epoch": 0.8592672705150737, + "grad_norm": 0.35039857029914856, + "learning_rate": 1.5472129213181745e-06, + "loss": 0.06768798828125, + "step": 12712 + }, + { + "epoch": 0.8593348654860078, + "grad_norm": 1.2200044393539429, + "learning_rate": 1.5457537663082299e-06, + "loss": 0.193359375, + "step": 12713 + }, + { + "epoch": 0.859402460456942, + "grad_norm": 0.1755046397447586, + "learning_rate": 1.5442952622983825e-06, + "loss": 0.030612945556640625, + "step": 12714 + }, + { + "epoch": 0.8594700554278761, + "grad_norm": 0.9315910935401917, + "learning_rate": 1.5428374093592124e-06, + "loss": 0.1396331787109375, + "step": 12715 + }, + { + "epoch": 0.8595376503988104, + "grad_norm": 0.6627500653266907, + "learning_rate": 1.5413802075612615e-06, + "loss": 0.10762786865234375, + "step": 12716 + }, + { + "epoch": 0.8596052453697445, + "grad_norm": 0.4567340910434723, + "learning_rate": 1.5399236569750286e-06, + "loss": 0.0843048095703125, + "step": 12717 + }, + { + "epoch": 0.8596728403406787, + "grad_norm": 0.5078151226043701, + "learning_rate": 1.5384677576709987e-06, + "loss": 0.113861083984375, + "step": 12718 + }, + { + "epoch": 0.8597404353116128, + "grad_norm": 0.2902606427669525, + "learning_rate": 1.537012509719616e-06, + "loss": 0.05126953125, + "step": 12719 + }, + { + "epoch": 0.8598080302825469, + "grad_norm": 1.2714283466339111, + "learning_rate": 1.5355579131912934e-06, + "loss": 0.1570587158203125, + "step": 12720 + }, + { + "epoch": 0.8598756252534812, + "grad_norm": 0.3063284158706665, + "learning_rate": 1.5341039681564134e-06, + "loss": 0.05780029296875, + "step": 12721 + }, + { + "epoch": 0.8599432202244153, + "grad_norm": 0.692421555519104, + "learning_rate": 1.5326506746853265e-06, + "loss": 0.1339111328125, + "step": 12722 + }, + { + "epoch": 0.8600108151953495, + "grad_norm": 0.8239708542823792, + "learning_rate": 1.5311980328483494e-06, + "loss": 0.1395721435546875, + "step": 12723 + }, + { + "epoch": 0.8600784101662836, + "grad_norm": 0.40418949723243713, + "learning_rate": 1.5297460427157794e-06, + "loss": 0.060638427734375, + "step": 12724 + }, + { + "epoch": 0.8601460051372178, + "grad_norm": 1.0983433723449707, + "learning_rate": 1.5282947043578605e-06, + "loss": 0.2183837890625, + "step": 12725 + }, + { + "epoch": 0.860213600108152, + "grad_norm": 0.29597410559654236, + "learning_rate": 1.5268440178448263e-06, + "loss": 0.04183673858642578, + "step": 12726 + }, + { + "epoch": 0.8602811950790861, + "grad_norm": 0.42018190026283264, + "learning_rate": 1.5253939832468704e-06, + "loss": 0.049106597900390625, + "step": 12727 + }, + { + "epoch": 0.8603487900500203, + "grad_norm": 0.2825149595737457, + "learning_rate": 1.5239446006341455e-06, + "loss": 0.056064605712890625, + "step": 12728 + }, + { + "epoch": 0.8604163850209544, + "grad_norm": 0.279365211725235, + "learning_rate": 1.522495870076792e-06, + "loss": 0.03912353515625, + "step": 12729 + }, + { + "epoch": 0.8604839799918886, + "grad_norm": 0.34251633286476135, + "learning_rate": 1.5210477916449006e-06, + "loss": 0.049774169921875, + "step": 12730 + }, + { + "epoch": 0.8605515749628228, + "grad_norm": 1.1544116735458374, + "learning_rate": 1.5196003654085432e-06, + "loss": 0.182403564453125, + "step": 12731 + }, + { + "epoch": 0.860619169933757, + "grad_norm": 2.1946372985839844, + "learning_rate": 1.5181535914377543e-06, + "loss": 0.28363037109375, + "step": 12732 + }, + { + "epoch": 0.8606867649046911, + "grad_norm": 0.44384533166885376, + "learning_rate": 1.5167074698025357e-06, + "loss": 0.06029510498046875, + "step": 12733 + }, + { + "epoch": 0.8607543598756252, + "grad_norm": 0.6470673680305481, + "learning_rate": 1.5152620005728636e-06, + "loss": 0.12396240234375, + "step": 12734 + }, + { + "epoch": 0.8608219548465594, + "grad_norm": 0.33587703108787537, + "learning_rate": 1.513817183818675e-06, + "loss": 0.04291057586669922, + "step": 12735 + }, + { + "epoch": 0.8608895498174935, + "grad_norm": 1.0779969692230225, + "learning_rate": 1.512373019609879e-06, + "loss": 0.173858642578125, + "step": 12736 + }, + { + "epoch": 0.8609571447884278, + "grad_norm": 0.23476873338222504, + "learning_rate": 1.51092950801636e-06, + "loss": 0.03362274169921875, + "step": 12737 + }, + { + "epoch": 0.8610247397593619, + "grad_norm": 0.6236001253128052, + "learning_rate": 1.5094866491079534e-06, + "loss": 0.10871124267578125, + "step": 12738 + }, + { + "epoch": 0.8610923347302961, + "grad_norm": 0.8468221426010132, + "learning_rate": 1.5080444429544821e-06, + "loss": 0.09920692443847656, + "step": 12739 + }, + { + "epoch": 0.8611599297012302, + "grad_norm": 0.40993574261665344, + "learning_rate": 1.50660288962573e-06, + "loss": 0.06681060791015625, + "step": 12740 + }, + { + "epoch": 0.8612275246721643, + "grad_norm": 0.4125325083732605, + "learning_rate": 1.5051619891914382e-06, + "loss": 0.06836700439453125, + "step": 12741 + }, + { + "epoch": 0.8612951196430986, + "grad_norm": 1.4347259998321533, + "learning_rate": 1.5037217417213372e-06, + "loss": 0.12578964233398438, + "step": 12742 + }, + { + "epoch": 0.8613627146140327, + "grad_norm": 0.37413346767425537, + "learning_rate": 1.5022821472851068e-06, + "loss": 0.05062103271484375, + "step": 12743 + }, + { + "epoch": 0.8614303095849669, + "grad_norm": 0.2769859731197357, + "learning_rate": 1.5008432059524109e-06, + "loss": 0.04358673095703125, + "step": 12744 + }, + { + "epoch": 0.861497904555901, + "grad_norm": 1.5173624753952026, + "learning_rate": 1.4994049177928704e-06, + "loss": 0.200347900390625, + "step": 12745 + }, + { + "epoch": 0.8615654995268353, + "grad_norm": 0.6010432839393616, + "learning_rate": 1.4979672828760781e-06, + "loss": 0.11946868896484375, + "step": 12746 + }, + { + "epoch": 0.8616330944977694, + "grad_norm": 0.5206865668296814, + "learning_rate": 1.4965303012715987e-06, + "loss": 0.112518310546875, + "step": 12747 + }, + { + "epoch": 0.8617006894687035, + "grad_norm": 0.8327781558036804, + "learning_rate": 1.4950939730489593e-06, + "loss": 0.18035888671875, + "step": 12748 + }, + { + "epoch": 0.8617682844396377, + "grad_norm": 0.3766125440597534, + "learning_rate": 1.4936582982776565e-06, + "loss": 0.076690673828125, + "step": 12749 + }, + { + "epoch": 0.8618358794105718, + "grad_norm": 1.5010167360305786, + "learning_rate": 1.492223277027166e-06, + "loss": 0.14815902709960938, + "step": 12750 + }, + { + "epoch": 0.861903474381506, + "grad_norm": 0.7066514492034912, + "learning_rate": 1.4907889093669157e-06, + "loss": 0.11771392822265625, + "step": 12751 + }, + { + "epoch": 0.8619710693524402, + "grad_norm": 1.543251633644104, + "learning_rate": 1.489355195366307e-06, + "loss": 0.12787628173828125, + "step": 12752 + }, + { + "epoch": 0.8620386643233744, + "grad_norm": 0.9387001395225525, + "learning_rate": 1.4879221350947225e-06, + "loss": 0.11312484741210938, + "step": 12753 + }, + { + "epoch": 0.8621062592943085, + "grad_norm": 0.35820794105529785, + "learning_rate": 1.4864897286214885e-06, + "loss": 0.042911529541015625, + "step": 12754 + }, + { + "epoch": 0.8621738542652426, + "grad_norm": 0.7621116042137146, + "learning_rate": 1.4850579760159261e-06, + "loss": 0.10688400268554688, + "step": 12755 + }, + { + "epoch": 0.8622414492361768, + "grad_norm": 0.2946794033050537, + "learning_rate": 1.483626877347305e-06, + "loss": 0.043766021728515625, + "step": 12756 + }, + { + "epoch": 0.862309044207111, + "grad_norm": 0.5125455260276794, + "learning_rate": 1.4821964326848748e-06, + "loss": 0.1024627685546875, + "step": 12757 + }, + { + "epoch": 0.8623766391780452, + "grad_norm": 1.0965641736984253, + "learning_rate": 1.4807666420978467e-06, + "loss": 0.215118408203125, + "step": 12758 + }, + { + "epoch": 0.8624442341489793, + "grad_norm": 0.5828242897987366, + "learning_rate": 1.4793375056554038e-06, + "loss": 0.1255340576171875, + "step": 12759 + }, + { + "epoch": 0.8625118291199135, + "grad_norm": 1.5314253568649292, + "learning_rate": 1.4779090234266957e-06, + "loss": 0.2086334228515625, + "step": 12760 + }, + { + "epoch": 0.8625794240908476, + "grad_norm": 0.7164239287376404, + "learning_rate": 1.4764811954808422e-06, + "loss": 0.1366119384765625, + "step": 12761 + }, + { + "epoch": 0.8626470190617818, + "grad_norm": 0.5168100595474243, + "learning_rate": 1.4750540218869312e-06, + "loss": 0.08541107177734375, + "step": 12762 + }, + { + "epoch": 0.862714614032716, + "grad_norm": 0.5761257410049438, + "learning_rate": 1.4736275027140178e-06, + "loss": 0.1185302734375, + "step": 12763 + }, + { + "epoch": 0.8627822090036501, + "grad_norm": 0.7338597774505615, + "learning_rate": 1.4722016380311232e-06, + "loss": 0.11952972412109375, + "step": 12764 + }, + { + "epoch": 0.8628498039745843, + "grad_norm": 0.4014087915420532, + "learning_rate": 1.4707764279072406e-06, + "loss": 0.07306289672851562, + "step": 12765 + }, + { + "epoch": 0.8629173989455184, + "grad_norm": 0.7313563227653503, + "learning_rate": 1.469351872411338e-06, + "loss": 0.10225677490234375, + "step": 12766 + }, + { + "epoch": 0.8629849939164527, + "grad_norm": 0.5600096583366394, + "learning_rate": 1.4679279716123306e-06, + "loss": 0.09970855712890625, + "step": 12767 + }, + { + "epoch": 0.8630525888873868, + "grad_norm": 0.8845500349998474, + "learning_rate": 1.466504725579128e-06, + "loss": 0.197235107421875, + "step": 12768 + }, + { + "epoch": 0.8631201838583209, + "grad_norm": 0.233337864279747, + "learning_rate": 1.4650821343805888e-06, + "loss": 0.04128265380859375, + "step": 12769 + }, + { + "epoch": 0.8631877788292551, + "grad_norm": 0.6886225342750549, + "learning_rate": 1.4636601980855492e-06, + "loss": 0.1480712890625, + "step": 12770 + }, + { + "epoch": 0.8632553738001892, + "grad_norm": 1.1908955574035645, + "learning_rate": 1.4622389167628109e-06, + "loss": 0.19110107421875, + "step": 12771 + }, + { + "epoch": 0.8633229687711235, + "grad_norm": 0.6579012274742126, + "learning_rate": 1.460818290481144e-06, + "loss": 0.1429901123046875, + "step": 12772 + }, + { + "epoch": 0.8633905637420576, + "grad_norm": 0.31691551208496094, + "learning_rate": 1.4593983193092853e-06, + "loss": 0.06230926513671875, + "step": 12773 + }, + { + "epoch": 0.8634581587129918, + "grad_norm": 0.48827698826789856, + "learning_rate": 1.4579790033159497e-06, + "loss": 0.0722503662109375, + "step": 12774 + }, + { + "epoch": 0.8635257536839259, + "grad_norm": 0.5703166127204895, + "learning_rate": 1.4565603425698038e-06, + "loss": 0.0966949462890625, + "step": 12775 + }, + { + "epoch": 0.86359334865486, + "grad_norm": 0.3437548577785492, + "learning_rate": 1.4551423371394945e-06, + "loss": 0.0414581298828125, + "step": 12776 + }, + { + "epoch": 0.8636609436257943, + "grad_norm": 0.3201811909675598, + "learning_rate": 1.453724987093632e-06, + "loss": 0.062450408935546875, + "step": 12777 + }, + { + "epoch": 0.8637285385967284, + "grad_norm": 0.16483767330646515, + "learning_rate": 1.4523082925007964e-06, + "loss": 0.021404266357421875, + "step": 12778 + }, + { + "epoch": 0.8637961335676626, + "grad_norm": 0.4071117639541626, + "learning_rate": 1.4508922534295443e-06, + "loss": 0.07840728759765625, + "step": 12779 + }, + { + "epoch": 0.8638637285385967, + "grad_norm": 1.2265679836273193, + "learning_rate": 1.449476869948378e-06, + "loss": 0.22314453125, + "step": 12780 + }, + { + "epoch": 0.863931323509531, + "grad_norm": 0.5293611288070679, + "learning_rate": 1.4480621421257957e-06, + "loss": 0.09835052490234375, + "step": 12781 + }, + { + "epoch": 0.8639989184804651, + "grad_norm": 0.7461565136909485, + "learning_rate": 1.4466480700302443e-06, + "loss": 0.1568450927734375, + "step": 12782 + }, + { + "epoch": 0.8640665134513992, + "grad_norm": 0.7668120265007019, + "learning_rate": 1.4452346537301475e-06, + "loss": 0.12796783447265625, + "step": 12783 + }, + { + "epoch": 0.8641341084223334, + "grad_norm": 0.6653928160667419, + "learning_rate": 1.4438218932938924e-06, + "loss": 0.113555908203125, + "step": 12784 + }, + { + "epoch": 0.8642017033932675, + "grad_norm": 0.6691594123840332, + "learning_rate": 1.4424097887898407e-06, + "loss": 0.14379119873046875, + "step": 12785 + }, + { + "epoch": 0.8642692983642017, + "grad_norm": 0.9352470636367798, + "learning_rate": 1.440998340286318e-06, + "loss": 0.1388702392578125, + "step": 12786 + }, + { + "epoch": 0.8643368933351359, + "grad_norm": 0.9371748566627502, + "learning_rate": 1.4395875478516162e-06, + "loss": 0.1904754638671875, + "step": 12787 + }, + { + "epoch": 0.86440448830607, + "grad_norm": 0.7146828770637512, + "learning_rate": 1.4381774115540008e-06, + "loss": 0.137664794921875, + "step": 12788 + }, + { + "epoch": 0.8644720832770042, + "grad_norm": 0.4244794547557831, + "learning_rate": 1.4367679314617022e-06, + "loss": 0.0811614990234375, + "step": 12789 + }, + { + "epoch": 0.8645396782479383, + "grad_norm": 0.7224961519241333, + "learning_rate": 1.4353591076429207e-06, + "loss": 0.13250732421875, + "step": 12790 + }, + { + "epoch": 0.8646072732188725, + "grad_norm": 0.2651195526123047, + "learning_rate": 1.4339509401658201e-06, + "loss": 0.03747367858886719, + "step": 12791 + }, + { + "epoch": 0.8646748681898067, + "grad_norm": 0.5722193717956543, + "learning_rate": 1.4325434290985446e-06, + "loss": 0.07352828979492188, + "step": 12792 + }, + { + "epoch": 0.8647424631607409, + "grad_norm": 1.1189050674438477, + "learning_rate": 1.4311365745091876e-06, + "loss": 0.15155029296875, + "step": 12793 + }, + { + "epoch": 0.864810058131675, + "grad_norm": 1.209017038345337, + "learning_rate": 1.42973037646583e-06, + "loss": 0.153564453125, + "step": 12794 + }, + { + "epoch": 0.8648776531026091, + "grad_norm": 0.34676554799079895, + "learning_rate": 1.4283248350365085e-06, + "loss": 0.05220222473144531, + "step": 12795 + }, + { + "epoch": 0.8649452480735433, + "grad_norm": 0.32166588306427, + "learning_rate": 1.4269199502892344e-06, + "loss": 0.062591552734375, + "step": 12796 + }, + { + "epoch": 0.8650128430444775, + "grad_norm": 1.2971735000610352, + "learning_rate": 1.4255157222919812e-06, + "loss": 0.12784576416015625, + "step": 12797 + }, + { + "epoch": 0.8650804380154117, + "grad_norm": 1.0823103189468384, + "learning_rate": 1.4241121511126981e-06, + "loss": 0.187713623046875, + "step": 12798 + }, + { + "epoch": 0.8651480329863458, + "grad_norm": 0.20370714366436005, + "learning_rate": 1.4227092368192956e-06, + "loss": 0.03295707702636719, + "step": 12799 + }, + { + "epoch": 0.86521562795728, + "grad_norm": 0.7351903319358826, + "learning_rate": 1.4213069794796562e-06, + "loss": 0.138946533203125, + "step": 12800 + }, + { + "epoch": 0.8652832229282141, + "grad_norm": 0.48029327392578125, + "learning_rate": 1.4199053791616307e-06, + "loss": 0.0799102783203125, + "step": 12801 + }, + { + "epoch": 0.8653508178991483, + "grad_norm": 0.5223603844642639, + "learning_rate": 1.4185044359330347e-06, + "loss": 0.09643936157226562, + "step": 12802 + }, + { + "epoch": 0.8654184128700825, + "grad_norm": 0.7212107181549072, + "learning_rate": 1.4171041498616604e-06, + "loss": 0.13018798828125, + "step": 12803 + }, + { + "epoch": 0.8654860078410166, + "grad_norm": 1.4639978408813477, + "learning_rate": 1.4157045210152543e-06, + "loss": 0.17908096313476562, + "step": 12804 + }, + { + "epoch": 0.8655536028119508, + "grad_norm": 0.7825180292129517, + "learning_rate": 1.4143055494615481e-06, + "loss": 0.1109771728515625, + "step": 12805 + }, + { + "epoch": 0.8656211977828849, + "grad_norm": 0.3733905851840973, + "learning_rate": 1.4129072352682216e-06, + "loss": 0.05185699462890625, + "step": 12806 + }, + { + "epoch": 0.8656887927538192, + "grad_norm": 0.9355771541595459, + "learning_rate": 1.4115095785029453e-06, + "loss": 0.1751708984375, + "step": 12807 + }, + { + "epoch": 0.8657563877247533, + "grad_norm": 0.6835345029830933, + "learning_rate": 1.4101125792333386e-06, + "loss": 0.1570892333984375, + "step": 12808 + }, + { + "epoch": 0.8658239826956874, + "grad_norm": 1.6613264083862305, + "learning_rate": 1.4087162375270024e-06, + "loss": 0.192169189453125, + "step": 12809 + }, + { + "epoch": 0.8658915776666216, + "grad_norm": 0.6265497803688049, + "learning_rate": 1.4073205534514978e-06, + "loss": 0.1192779541015625, + "step": 12810 + }, + { + "epoch": 0.8659591726375557, + "grad_norm": 1.2242372035980225, + "learning_rate": 1.4059255270743554e-06, + "loss": 0.209869384765625, + "step": 12811 + }, + { + "epoch": 0.86602676760849, + "grad_norm": 3.2851333618164062, + "learning_rate": 1.4045311584630765e-06, + "loss": 0.14072799682617188, + "step": 12812 + }, + { + "epoch": 0.8660943625794241, + "grad_norm": 0.7263956069946289, + "learning_rate": 1.4031374476851306e-06, + "loss": 0.09123992919921875, + "step": 12813 + }, + { + "epoch": 0.8661619575503583, + "grad_norm": 1.2261046171188354, + "learning_rate": 1.4017443948079533e-06, + "loss": 0.225738525390625, + "step": 12814 + }, + { + "epoch": 0.8662295525212924, + "grad_norm": 0.4347233474254608, + "learning_rate": 1.4003519998989444e-06, + "loss": 0.06778717041015625, + "step": 12815 + }, + { + "epoch": 0.8662971474922265, + "grad_norm": 0.3102060556411743, + "learning_rate": 1.3989602630254883e-06, + "loss": 0.04799652099609375, + "step": 12816 + }, + { + "epoch": 0.8663647424631608, + "grad_norm": 0.35673871636390686, + "learning_rate": 1.397569184254911e-06, + "loss": 0.06539154052734375, + "step": 12817 + }, + { + "epoch": 0.8664323374340949, + "grad_norm": 0.3510410189628601, + "learning_rate": 1.3961787636545337e-06, + "loss": 0.07598876953125, + "step": 12818 + }, + { + "epoch": 0.8664999324050291, + "grad_norm": 0.5926507711410522, + "learning_rate": 1.3947890012916292e-06, + "loss": 0.1241455078125, + "step": 12819 + }, + { + "epoch": 0.8665675273759632, + "grad_norm": 0.5960967540740967, + "learning_rate": 1.393399897233442e-06, + "loss": 0.08974456787109375, + "step": 12820 + }, + { + "epoch": 0.8666351223468974, + "grad_norm": 0.3228558599948883, + "learning_rate": 1.3920114515471887e-06, + "loss": 0.03946113586425781, + "step": 12821 + }, + { + "epoch": 0.8667027173178316, + "grad_norm": 0.7929567098617554, + "learning_rate": 1.3906236643000436e-06, + "loss": 0.15713119506835938, + "step": 12822 + }, + { + "epoch": 0.8667703122887657, + "grad_norm": 0.3867660164833069, + "learning_rate": 1.3892365355591646e-06, + "loss": 0.06082916259765625, + "step": 12823 + }, + { + "epoch": 0.8668379072596999, + "grad_norm": 0.8345584869384766, + "learning_rate": 1.3878500653916664e-06, + "loss": 0.13709259033203125, + "step": 12824 + }, + { + "epoch": 0.866905502230634, + "grad_norm": 1.4110064506530762, + "learning_rate": 1.3864642538646339e-06, + "loss": 0.171356201171875, + "step": 12825 + }, + { + "epoch": 0.8669730972015682, + "grad_norm": 0.534294605255127, + "learning_rate": 1.3850791010451231e-06, + "loss": 0.10110664367675781, + "step": 12826 + }, + { + "epoch": 0.8670406921725023, + "grad_norm": 0.27529221773147583, + "learning_rate": 1.3836946070001554e-06, + "loss": 0.046581268310546875, + "step": 12827 + }, + { + "epoch": 0.8671082871434366, + "grad_norm": 0.39913812279701233, + "learning_rate": 1.3823107717967193e-06, + "loss": 0.08502197265625, + "step": 12828 + }, + { + "epoch": 0.8671758821143707, + "grad_norm": 0.2140267789363861, + "learning_rate": 1.3809275955017809e-06, + "loss": 0.03035736083984375, + "step": 12829 + }, + { + "epoch": 0.8672434770853048, + "grad_norm": 0.5451173186302185, + "learning_rate": 1.3795450781822566e-06, + "loss": 0.12188720703125, + "step": 12830 + }, + { + "epoch": 0.867311072056239, + "grad_norm": 1.0436103343963623, + "learning_rate": 1.3781632199050482e-06, + "loss": 0.1591796875, + "step": 12831 + }, + { + "epoch": 0.8673786670271731, + "grad_norm": 1.4603954553604126, + "learning_rate": 1.3767820207370202e-06, + "loss": 0.19716644287109375, + "step": 12832 + }, + { + "epoch": 0.8674462619981074, + "grad_norm": 0.2398921698331833, + "learning_rate": 1.3754014807449927e-06, + "loss": 0.033351898193359375, + "step": 12833 + }, + { + "epoch": 0.8675138569690415, + "grad_norm": 0.6092783808708191, + "learning_rate": 1.374021599995779e-06, + "loss": 0.1312713623046875, + "step": 12834 + }, + { + "epoch": 0.8675814519399757, + "grad_norm": 1.3055469989776611, + "learning_rate": 1.372642378556132e-06, + "loss": 0.1311798095703125, + "step": 12835 + }, + { + "epoch": 0.8676490469109098, + "grad_norm": 0.6986052393913269, + "learning_rate": 1.3712638164927988e-06, + "loss": 0.11241912841796875, + "step": 12836 + }, + { + "epoch": 0.8677166418818439, + "grad_norm": 0.3549569547176361, + "learning_rate": 1.369885913872479e-06, + "loss": 0.07186508178710938, + "step": 12837 + }, + { + "epoch": 0.8677842368527782, + "grad_norm": 1.037889003753662, + "learning_rate": 1.3685086707618411e-06, + "loss": 0.151123046875, + "step": 12838 + }, + { + "epoch": 0.8678518318237123, + "grad_norm": 0.5896769165992737, + "learning_rate": 1.3671320872275283e-06, + "loss": 0.10876846313476562, + "step": 12839 + }, + { + "epoch": 0.8679194267946465, + "grad_norm": 0.6570382118225098, + "learning_rate": 1.3657561633361476e-06, + "loss": 0.1340179443359375, + "step": 12840 + }, + { + "epoch": 0.8679870217655806, + "grad_norm": 2.2949109077453613, + "learning_rate": 1.3643808991542706e-06, + "loss": 0.23065185546875, + "step": 12841 + }, + { + "epoch": 0.8680546167365149, + "grad_norm": 0.8379049897193909, + "learning_rate": 1.3630062947484507e-06, + "loss": 0.1068267822265625, + "step": 12842 + }, + { + "epoch": 0.868122211707449, + "grad_norm": 0.8141569495201111, + "learning_rate": 1.361632350185188e-06, + "loss": 0.110382080078125, + "step": 12843 + }, + { + "epoch": 0.8681898066783831, + "grad_norm": 0.802521824836731, + "learning_rate": 1.360259065530971e-06, + "loss": 0.0759124755859375, + "step": 12844 + }, + { + "epoch": 0.8682574016493173, + "grad_norm": 0.31929364800453186, + "learning_rate": 1.3588864408522483e-06, + "loss": 0.05542182922363281, + "step": 12845 + }, + { + "epoch": 0.8683249966202514, + "grad_norm": 0.8550617694854736, + "learning_rate": 1.3575144762154269e-06, + "loss": 0.12038040161132812, + "step": 12846 + }, + { + "epoch": 0.8683925915911856, + "grad_norm": 0.21320179104804993, + "learning_rate": 1.3561431716869e-06, + "loss": 0.041439056396484375, + "step": 12847 + }, + { + "epoch": 0.8684601865621198, + "grad_norm": 0.7965275049209595, + "learning_rate": 1.3547725273330164e-06, + "loss": 0.14007568359375, + "step": 12848 + }, + { + "epoch": 0.868527781533054, + "grad_norm": 0.6930543184280396, + "learning_rate": 1.3534025432200965e-06, + "loss": 0.1372222900390625, + "step": 12849 + }, + { + "epoch": 0.8685953765039881, + "grad_norm": 1.3602882623672485, + "learning_rate": 1.3520332194144285e-06, + "loss": 0.2512054443359375, + "step": 12850 + }, + { + "epoch": 0.8686629714749222, + "grad_norm": 0.3230597972869873, + "learning_rate": 1.35066455598227e-06, + "loss": 0.04494476318359375, + "step": 12851 + }, + { + "epoch": 0.8687305664458564, + "grad_norm": 0.6753949522972107, + "learning_rate": 1.3492965529898443e-06, + "loss": 0.1374053955078125, + "step": 12852 + }, + { + "epoch": 0.8687981614167906, + "grad_norm": 0.6406221389770508, + "learning_rate": 1.3479292105033435e-06, + "loss": 0.13494873046875, + "step": 12853 + }, + { + "epoch": 0.8688657563877248, + "grad_norm": 0.8668636083602905, + "learning_rate": 1.3465625285889265e-06, + "loss": 0.150970458984375, + "step": 12854 + }, + { + "epoch": 0.8689333513586589, + "grad_norm": 0.4555406868457794, + "learning_rate": 1.3451965073127303e-06, + "loss": 0.083984375, + "step": 12855 + }, + { + "epoch": 0.8690009463295931, + "grad_norm": 0.631332516670227, + "learning_rate": 1.343831146740842e-06, + "loss": 0.11194610595703125, + "step": 12856 + }, + { + "epoch": 0.8690685413005272, + "grad_norm": 0.9946329593658447, + "learning_rate": 1.3424664469393272e-06, + "loss": 0.1700439453125, + "step": 12857 + }, + { + "epoch": 0.8691361362714614, + "grad_norm": 0.5840315818786621, + "learning_rate": 1.3411024079742263e-06, + "loss": 0.0992889404296875, + "step": 12858 + }, + { + "epoch": 0.8692037312423956, + "grad_norm": 0.25929775834083557, + "learning_rate": 1.339739029911528e-06, + "loss": 0.05352592468261719, + "step": 12859 + }, + { + "epoch": 0.8692713262133297, + "grad_norm": 1.0958644151687622, + "learning_rate": 1.3383763128172117e-06, + "loss": 0.1766357421875, + "step": 12860 + }, + { + "epoch": 0.8693389211842639, + "grad_norm": 0.963782548904419, + "learning_rate": 1.3370142567572107e-06, + "loss": 0.14038658142089844, + "step": 12861 + }, + { + "epoch": 0.869406516155198, + "grad_norm": 1.3398854732513428, + "learning_rate": 1.335652861797428e-06, + "loss": 0.152740478515625, + "step": 12862 + }, + { + "epoch": 0.8694741111261323, + "grad_norm": 0.8745526671409607, + "learning_rate": 1.3342921280037385e-06, + "loss": 0.16485595703125, + "step": 12863 + }, + { + "epoch": 0.8695417060970664, + "grad_norm": 0.7641265988349915, + "learning_rate": 1.3329320554419817e-06, + "loss": 0.10036468505859375, + "step": 12864 + }, + { + "epoch": 0.8696093010680005, + "grad_norm": 1.0490851402282715, + "learning_rate": 1.331572644177963e-06, + "loss": 0.1328582763671875, + "step": 12865 + }, + { + "epoch": 0.8696768960389347, + "grad_norm": 0.35968542098999023, + "learning_rate": 1.3302138942774683e-06, + "loss": 0.0532379150390625, + "step": 12866 + }, + { + "epoch": 0.8697444910098688, + "grad_norm": 0.6136007905006409, + "learning_rate": 1.328855805806235e-06, + "loss": 0.09721565246582031, + "step": 12867 + }, + { + "epoch": 0.8698120859808031, + "grad_norm": 1.3806235790252686, + "learning_rate": 1.3274983788299771e-06, + "loss": 0.20758056640625, + "step": 12868 + }, + { + "epoch": 0.8698796809517372, + "grad_norm": 0.5250229239463806, + "learning_rate": 1.3261416134143772e-06, + "loss": 0.127288818359375, + "step": 12869 + }, + { + "epoch": 0.8699472759226714, + "grad_norm": 0.5216444730758667, + "learning_rate": 1.3247855096250795e-06, + "loss": 0.09856414794921875, + "step": 12870 + }, + { + "epoch": 0.8700148708936055, + "grad_norm": 0.3950931131839752, + "learning_rate": 1.3234300675277094e-06, + "loss": 0.08769989013671875, + "step": 12871 + }, + { + "epoch": 0.8700824658645396, + "grad_norm": 0.4536764323711395, + "learning_rate": 1.32207528718784e-06, + "loss": 0.08193206787109375, + "step": 12872 + }, + { + "epoch": 0.8701500608354739, + "grad_norm": 0.9458655714988708, + "learning_rate": 1.3207211686710352e-06, + "loss": 0.131683349609375, + "step": 12873 + }, + { + "epoch": 0.870217655806408, + "grad_norm": 1.121226191520691, + "learning_rate": 1.3193677120428095e-06, + "loss": 0.12445068359375, + "step": 12874 + }, + { + "epoch": 0.8702852507773422, + "grad_norm": 0.5595988631248474, + "learning_rate": 1.3180149173686517e-06, + "loss": 0.08531951904296875, + "step": 12875 + }, + { + "epoch": 0.8703528457482763, + "grad_norm": 0.6343626976013184, + "learning_rate": 1.3166627847140216e-06, + "loss": 0.09865188598632812, + "step": 12876 + }, + { + "epoch": 0.8704204407192105, + "grad_norm": 0.4952865242958069, + "learning_rate": 1.3153113141443402e-06, + "loss": 0.09055328369140625, + "step": 12877 + }, + { + "epoch": 0.8704880356901447, + "grad_norm": 0.6942479610443115, + "learning_rate": 1.3139605057249999e-06, + "loss": 0.09672164916992188, + "step": 12878 + }, + { + "epoch": 0.8705556306610788, + "grad_norm": 1.1331756114959717, + "learning_rate": 1.3126103595213667e-06, + "loss": 0.14061737060546875, + "step": 12879 + }, + { + "epoch": 0.870623225632013, + "grad_norm": 0.6226947903633118, + "learning_rate": 1.3112608755987637e-06, + "loss": 0.11718940734863281, + "step": 12880 + }, + { + "epoch": 0.8706908206029471, + "grad_norm": 0.3366332948207855, + "learning_rate": 1.30991205402249e-06, + "loss": 0.03919219970703125, + "step": 12881 + }, + { + "epoch": 0.8707584155738813, + "grad_norm": 0.625028669834137, + "learning_rate": 1.3085638948578072e-06, + "loss": 0.05718994140625, + "step": 12882 + }, + { + "epoch": 0.8708260105448155, + "grad_norm": 0.37792447209358215, + "learning_rate": 1.3072163981699476e-06, + "loss": 0.0373687744140625, + "step": 12883 + }, + { + "epoch": 0.8708936055157497, + "grad_norm": 0.35251256823539734, + "learning_rate": 1.3058695640241176e-06, + "loss": 0.048519134521484375, + "step": 12884 + }, + { + "epoch": 0.8709612004866838, + "grad_norm": 0.3635754883289337, + "learning_rate": 1.3045233924854766e-06, + "loss": 0.07080841064453125, + "step": 12885 + }, + { + "epoch": 0.8710287954576179, + "grad_norm": 0.718245267868042, + "learning_rate": 1.3031778836191677e-06, + "loss": 0.10973739624023438, + "step": 12886 + }, + { + "epoch": 0.8710963904285521, + "grad_norm": 1.1597990989685059, + "learning_rate": 1.301833037490292e-06, + "loss": 0.13231658935546875, + "step": 12887 + }, + { + "epoch": 0.8711639853994863, + "grad_norm": 0.4967052638530731, + "learning_rate": 1.3004888541639209e-06, + "loss": 0.0899658203125, + "step": 12888 + }, + { + "epoch": 0.8712315803704205, + "grad_norm": 0.8980648517608643, + "learning_rate": 1.2991453337050973e-06, + "loss": 0.115966796875, + "step": 12889 + }, + { + "epoch": 0.8712991753413546, + "grad_norm": 0.7849092483520508, + "learning_rate": 1.297802476178826e-06, + "loss": 0.11788558959960938, + "step": 12890 + }, + { + "epoch": 0.8713667703122888, + "grad_norm": 0.2751769721508026, + "learning_rate": 1.2964602816500849e-06, + "loss": 0.0516357421875, + "step": 12891 + }, + { + "epoch": 0.8714343652832229, + "grad_norm": 0.5802496075630188, + "learning_rate": 1.2951187501838152e-06, + "loss": 0.088775634765625, + "step": 12892 + }, + { + "epoch": 0.871501960254157, + "grad_norm": 1.035967469215393, + "learning_rate": 1.293777881844932e-06, + "loss": 0.180755615234375, + "step": 12893 + }, + { + "epoch": 0.8715695552250913, + "grad_norm": 1.195618987083435, + "learning_rate": 1.2924376766983081e-06, + "loss": 0.2069091796875, + "step": 12894 + }, + { + "epoch": 0.8716371501960254, + "grad_norm": 0.8062930107116699, + "learning_rate": 1.2910981348088036e-06, + "loss": 0.121917724609375, + "step": 12895 + }, + { + "epoch": 0.8717047451669596, + "grad_norm": 0.47340384125709534, + "learning_rate": 1.2897592562412197e-06, + "loss": 0.07968902587890625, + "step": 12896 + }, + { + "epoch": 0.8717723401378937, + "grad_norm": 0.41777780652046204, + "learning_rate": 1.2884210410603513e-06, + "loss": 0.0687408447265625, + "step": 12897 + }, + { + "epoch": 0.871839935108828, + "grad_norm": 0.9183326959609985, + "learning_rate": 1.2870834893309386e-06, + "loss": 0.20086669921875, + "step": 12898 + }, + { + "epoch": 0.8719075300797621, + "grad_norm": 0.5500118732452393, + "learning_rate": 1.2857466011177094e-06, + "loss": 0.08881378173828125, + "step": 12899 + }, + { + "epoch": 0.8719751250506962, + "grad_norm": 0.2877245843410492, + "learning_rate": 1.284410376485347e-06, + "loss": 0.060760498046875, + "step": 12900 + }, + { + "epoch": 0.8720427200216304, + "grad_norm": 0.8831292986869812, + "learning_rate": 1.2830748154985084e-06, + "loss": 0.14232254028320312, + "step": 12901 + }, + { + "epoch": 0.8721103149925645, + "grad_norm": 1.340734601020813, + "learning_rate": 1.281739918221813e-06, + "loss": 0.1376800537109375, + "step": 12902 + }, + { + "epoch": 0.8721779099634988, + "grad_norm": 0.26588189601898193, + "learning_rate": 1.2804056847198547e-06, + "loss": 0.037967681884765625, + "step": 12903 + }, + { + "epoch": 0.8722455049344329, + "grad_norm": 0.3162368834018707, + "learning_rate": 1.2790721150571899e-06, + "loss": 0.044826507568359375, + "step": 12904 + }, + { + "epoch": 0.8723130999053671, + "grad_norm": 0.26485884189605713, + "learning_rate": 1.277739209298347e-06, + "loss": 0.034694671630859375, + "step": 12905 + }, + { + "epoch": 0.8723806948763012, + "grad_norm": 0.40395769476890564, + "learning_rate": 1.2764069675078178e-06, + "loss": 0.06610107421875, + "step": 12906 + }, + { + "epoch": 0.8724482898472353, + "grad_norm": 0.9056178331375122, + "learning_rate": 1.2750753897500638e-06, + "loss": 0.12520217895507812, + "step": 12907 + }, + { + "epoch": 0.8725158848181696, + "grad_norm": 0.4211197793483734, + "learning_rate": 1.2737444760895205e-06, + "loss": 0.1163482666015625, + "step": 12908 + }, + { + "epoch": 0.8725834797891037, + "grad_norm": 0.34327229857444763, + "learning_rate": 1.272414226590578e-06, + "loss": 0.047760009765625, + "step": 12909 + }, + { + "epoch": 0.8726510747600379, + "grad_norm": 0.47392451763153076, + "learning_rate": 1.2710846413176114e-06, + "loss": 0.0926055908203125, + "step": 12910 + }, + { + "epoch": 0.872718669730972, + "grad_norm": 0.6766212582588196, + "learning_rate": 1.2697557203349441e-06, + "loss": 0.11208343505859375, + "step": 12911 + }, + { + "epoch": 0.8727862647019062, + "grad_norm": 0.7442328333854675, + "learning_rate": 1.268427463706885e-06, + "loss": 0.124908447265625, + "step": 12912 + }, + { + "epoch": 0.8728538596728403, + "grad_norm": 0.6906651854515076, + "learning_rate": 1.2670998714977006e-06, + "loss": 0.1290283203125, + "step": 12913 + }, + { + "epoch": 0.8729214546437745, + "grad_norm": 0.2707548439502716, + "learning_rate": 1.2657729437716282e-06, + "loss": 0.046291351318359375, + "step": 12914 + }, + { + "epoch": 0.8729890496147087, + "grad_norm": 1.1254602670669556, + "learning_rate": 1.264446680592873e-06, + "loss": 0.19403076171875, + "step": 12915 + }, + { + "epoch": 0.8730566445856428, + "grad_norm": 0.41513529419898987, + "learning_rate": 1.2631210820256084e-06, + "loss": 0.1045379638671875, + "step": 12916 + }, + { + "epoch": 0.873124239556577, + "grad_norm": 0.32648858428001404, + "learning_rate": 1.2617961481339751e-06, + "loss": 0.0504913330078125, + "step": 12917 + }, + { + "epoch": 0.8731918345275111, + "grad_norm": 0.819396436214447, + "learning_rate": 1.2604718789820802e-06, + "loss": 0.1578216552734375, + "step": 12918 + }, + { + "epoch": 0.8732594294984453, + "grad_norm": 0.20550498366355896, + "learning_rate": 1.2591482746340005e-06, + "loss": 0.03527069091796875, + "step": 12919 + }, + { + "epoch": 0.8733270244693795, + "grad_norm": 0.5161107182502747, + "learning_rate": 1.25782533515378e-06, + "loss": 0.09606361389160156, + "step": 12920 + }, + { + "epoch": 0.8733946194403136, + "grad_norm": 0.8042795062065125, + "learning_rate": 1.2565030606054357e-06, + "loss": 0.1181488037109375, + "step": 12921 + }, + { + "epoch": 0.8734622144112478, + "grad_norm": 0.5127747058868408, + "learning_rate": 1.2551814510529364e-06, + "loss": 0.07906723022460938, + "step": 12922 + }, + { + "epoch": 0.8735298093821819, + "grad_norm": 0.4807754456996918, + "learning_rate": 1.253860506560241e-06, + "loss": 0.0929718017578125, + "step": 12923 + }, + { + "epoch": 0.8735974043531162, + "grad_norm": 0.6183529496192932, + "learning_rate": 1.25254022719126e-06, + "loss": 0.07279205322265625, + "step": 12924 + }, + { + "epoch": 0.8736649993240503, + "grad_norm": 1.6795430183410645, + "learning_rate": 1.2512206130098774e-06, + "loss": 0.22540283203125, + "step": 12925 + }, + { + "epoch": 0.8737325942949844, + "grad_norm": 0.39054617285728455, + "learning_rate": 1.2499016640799487e-06, + "loss": 0.05899810791015625, + "step": 12926 + }, + { + "epoch": 0.8738001892659186, + "grad_norm": 0.9657678604125977, + "learning_rate": 1.2485833804652814e-06, + "loss": 0.1405792236328125, + "step": 12927 + }, + { + "epoch": 0.8738677842368527, + "grad_norm": 0.4080584645271301, + "learning_rate": 1.2472657622296724e-06, + "loss": 0.0655975341796875, + "step": 12928 + }, + { + "epoch": 0.873935379207787, + "grad_norm": 0.8236615061759949, + "learning_rate": 1.2459488094368726e-06, + "loss": 0.17285919189453125, + "step": 12929 + }, + { + "epoch": 0.8740029741787211, + "grad_norm": 0.24137496948242188, + "learning_rate": 1.2446325221506045e-06, + "loss": 0.035907745361328125, + "step": 12930 + }, + { + "epoch": 0.8740705691496553, + "grad_norm": 0.7316229343414307, + "learning_rate": 1.2433169004345602e-06, + "loss": 0.1045074462890625, + "step": 12931 + }, + { + "epoch": 0.8741381641205894, + "grad_norm": 0.8532482981681824, + "learning_rate": 1.2420019443523939e-06, + "loss": 0.11507225036621094, + "step": 12932 + }, + { + "epoch": 0.8742057590915235, + "grad_norm": 0.6357151865959167, + "learning_rate": 1.240687653967733e-06, + "loss": 0.10189056396484375, + "step": 12933 + }, + { + "epoch": 0.8742733540624578, + "grad_norm": 0.41611218452453613, + "learning_rate": 1.2393740293441769e-06, + "loss": 0.092498779296875, + "step": 12934 + }, + { + "epoch": 0.8743409490333919, + "grad_norm": 0.3069489002227783, + "learning_rate": 1.238061070545276e-06, + "loss": 0.048858642578125, + "step": 12935 + }, + { + "epoch": 0.8744085440043261, + "grad_norm": 0.7282941341400146, + "learning_rate": 1.2367487776345666e-06, + "loss": 0.095489501953125, + "step": 12936 + }, + { + "epoch": 0.8744761389752602, + "grad_norm": 0.4234370291233063, + "learning_rate": 1.2354371506755475e-06, + "loss": 0.0882415771484375, + "step": 12937 + }, + { + "epoch": 0.8745437339461944, + "grad_norm": 0.842755138874054, + "learning_rate": 1.2341261897316747e-06, + "loss": 0.1334228515625, + "step": 12938 + }, + { + "epoch": 0.8746113289171286, + "grad_norm": 0.6246930956840515, + "learning_rate": 1.2328158948663893e-06, + "loss": 0.1084442138671875, + "step": 12939 + }, + { + "epoch": 0.8746789238880627, + "grad_norm": 0.44626015424728394, + "learning_rate": 1.231506266143087e-06, + "loss": 0.0918121337890625, + "step": 12940 + }, + { + "epoch": 0.8747465188589969, + "grad_norm": 0.36427998542785645, + "learning_rate": 1.2301973036251374e-06, + "loss": 0.05870819091796875, + "step": 12941 + }, + { + "epoch": 0.874814113829931, + "grad_norm": 0.1813458800315857, + "learning_rate": 1.2288890073758762e-06, + "loss": 0.0173492431640625, + "step": 12942 + }, + { + "epoch": 0.8748817088008652, + "grad_norm": 0.4808250665664673, + "learning_rate": 1.2275813774586065e-06, + "loss": 0.091644287109375, + "step": 12943 + }, + { + "epoch": 0.8749493037717994, + "grad_norm": 0.8286890983581543, + "learning_rate": 1.226274413936599e-06, + "loss": 0.18145751953125, + "step": 12944 + }, + { + "epoch": 0.8750168987427336, + "grad_norm": 0.8298854827880859, + "learning_rate": 1.224968116873093e-06, + "loss": 0.11135101318359375, + "step": 12945 + }, + { + "epoch": 0.8750844937136677, + "grad_norm": 0.3399115800857544, + "learning_rate": 1.2236624863312933e-06, + "loss": 0.060146331787109375, + "step": 12946 + }, + { + "epoch": 0.8751520886846018, + "grad_norm": 2.079782247543335, + "learning_rate": 1.222357522374381e-06, + "loss": 0.1803741455078125, + "step": 12947 + }, + { + "epoch": 0.875219683655536, + "grad_norm": 0.8633091449737549, + "learning_rate": 1.2210532250654889e-06, + "loss": 0.1927490234375, + "step": 12948 + }, + { + "epoch": 0.8752872786264702, + "grad_norm": 0.2846107482910156, + "learning_rate": 1.219749594467736e-06, + "loss": 0.044193267822265625, + "step": 12949 + }, + { + "epoch": 0.8753548735974044, + "grad_norm": 0.8375133872032166, + "learning_rate": 1.2184466306441978e-06, + "loss": 0.11598587036132812, + "step": 12950 + }, + { + "epoch": 0.8754224685683385, + "grad_norm": 1.0778722763061523, + "learning_rate": 1.217144333657913e-06, + "loss": 0.12856292724609375, + "step": 12951 + }, + { + "epoch": 0.8754900635392727, + "grad_norm": 0.8557659983634949, + "learning_rate": 1.2158427035719034e-06, + "loss": 0.156707763671875, + "step": 12952 + }, + { + "epoch": 0.8755576585102068, + "grad_norm": 0.2357570230960846, + "learning_rate": 1.214541740449145e-06, + "loss": 0.034880638122558594, + "step": 12953 + }, + { + "epoch": 0.875625253481141, + "grad_norm": 0.4242369532585144, + "learning_rate": 1.2132414443525892e-06, + "loss": 0.0572052001953125, + "step": 12954 + }, + { + "epoch": 0.8756928484520752, + "grad_norm": 0.6596667766571045, + "learning_rate": 1.2119418153451523e-06, + "loss": 0.12249755859375, + "step": 12955 + }, + { + "epoch": 0.8757604434230093, + "grad_norm": 0.8786641359329224, + "learning_rate": 1.2106428534897157e-06, + "loss": 0.14801025390625, + "step": 12956 + }, + { + "epoch": 0.8758280383939435, + "grad_norm": 0.3840169310569763, + "learning_rate": 1.2093445588491326e-06, + "loss": 0.05658531188964844, + "step": 12957 + }, + { + "epoch": 0.8758956333648776, + "grad_norm": 0.7186898589134216, + "learning_rate": 1.2080469314862242e-06, + "loss": 0.09967422485351562, + "step": 12958 + }, + { + "epoch": 0.8759632283358119, + "grad_norm": 1.1234970092773438, + "learning_rate": 1.2067499714637735e-06, + "loss": 0.1498260498046875, + "step": 12959 + }, + { + "epoch": 0.876030823306746, + "grad_norm": 0.4057205021381378, + "learning_rate": 1.2054536788445425e-06, + "loss": 0.07720947265625, + "step": 12960 + }, + { + "epoch": 0.8760984182776801, + "grad_norm": 0.36404141783714294, + "learning_rate": 1.2041580536912488e-06, + "loss": 0.068206787109375, + "step": 12961 + }, + { + "epoch": 0.8761660132486143, + "grad_norm": 1.2559480667114258, + "learning_rate": 1.202863096066581e-06, + "loss": 0.196868896484375, + "step": 12962 + }, + { + "epoch": 0.8762336082195484, + "grad_norm": 0.9629853367805481, + "learning_rate": 1.2015688060332054e-06, + "loss": 0.176849365234375, + "step": 12963 + }, + { + "epoch": 0.8763012031904827, + "grad_norm": 0.6674705147743225, + "learning_rate": 1.2002751836537367e-06, + "loss": 0.16632080078125, + "step": 12964 + }, + { + "epoch": 0.8763687981614168, + "grad_norm": 0.6708807945251465, + "learning_rate": 1.198982228990777e-06, + "loss": 0.13321685791015625, + "step": 12965 + }, + { + "epoch": 0.876436393132351, + "grad_norm": 0.6253324151039124, + "learning_rate": 1.1976899421068843e-06, + "loss": 0.12496185302734375, + "step": 12966 + }, + { + "epoch": 0.8765039881032851, + "grad_norm": 0.7136288285255432, + "learning_rate": 1.1963983230645886e-06, + "loss": 0.1303253173828125, + "step": 12967 + }, + { + "epoch": 0.8765715830742192, + "grad_norm": 0.3192113935947418, + "learning_rate": 1.195107371926385e-06, + "loss": 0.05765533447265625, + "step": 12968 + }, + { + "epoch": 0.8766391780451535, + "grad_norm": 0.6861570477485657, + "learning_rate": 1.19381708875474e-06, + "loss": 0.12365531921386719, + "step": 12969 + }, + { + "epoch": 0.8767067730160876, + "grad_norm": 1.440659523010254, + "learning_rate": 1.1925274736120807e-06, + "loss": 0.214111328125, + "step": 12970 + }, + { + "epoch": 0.8767743679870218, + "grad_norm": 0.23090817034244537, + "learning_rate": 1.1912385265608134e-06, + "loss": 0.05072021484375, + "step": 12971 + }, + { + "epoch": 0.8768419629579559, + "grad_norm": 0.24709445238113403, + "learning_rate": 1.1899502476633016e-06, + "loss": 0.023416519165039062, + "step": 12972 + }, + { + "epoch": 0.8769095579288901, + "grad_norm": 0.48118606209754944, + "learning_rate": 1.1886626369818793e-06, + "loss": 0.07987213134765625, + "step": 12973 + }, + { + "epoch": 0.8769771528998243, + "grad_norm": 0.47777825593948364, + "learning_rate": 1.187375694578851e-06, + "loss": 0.06966400146484375, + "step": 12974 + }, + { + "epoch": 0.8770447478707584, + "grad_norm": 0.535594642162323, + "learning_rate": 1.186089420516484e-06, + "loss": 0.0816802978515625, + "step": 12975 + }, + { + "epoch": 0.8771123428416926, + "grad_norm": 0.8960360288619995, + "learning_rate": 1.1848038148570234e-06, + "loss": 0.10682296752929688, + "step": 12976 + }, + { + "epoch": 0.8771799378126267, + "grad_norm": 0.8199092745780945, + "learning_rate": 1.1835188776626643e-06, + "loss": 0.08449935913085938, + "step": 12977 + }, + { + "epoch": 0.8772475327835609, + "grad_norm": 1.4332268238067627, + "learning_rate": 1.1822346089955888e-06, + "loss": 0.1455535888671875, + "step": 12978 + }, + { + "epoch": 0.877315127754495, + "grad_norm": 0.470200777053833, + "learning_rate": 1.1809510089179338e-06, + "loss": 0.06400299072265625, + "step": 12979 + }, + { + "epoch": 0.8773827227254293, + "grad_norm": 0.3225289285182953, + "learning_rate": 1.1796680774918094e-06, + "loss": 0.0538330078125, + "step": 12980 + }, + { + "epoch": 0.8774503176963634, + "grad_norm": 0.8588278889656067, + "learning_rate": 1.1783858147792915e-06, + "loss": 0.1234588623046875, + "step": 12981 + }, + { + "epoch": 0.8775179126672975, + "grad_norm": 0.25521159172058105, + "learning_rate": 1.1771042208424232e-06, + "loss": 0.03218269348144531, + "step": 12982 + }, + { + "epoch": 0.8775855076382317, + "grad_norm": 0.8759100437164307, + "learning_rate": 1.1758232957432152e-06, + "loss": 0.1688232421875, + "step": 12983 + }, + { + "epoch": 0.8776531026091658, + "grad_norm": 0.19969752430915833, + "learning_rate": 1.1745430395436513e-06, + "loss": 0.032318115234375, + "step": 12984 + }, + { + "epoch": 0.8777206975801001, + "grad_norm": 2.0223915576934814, + "learning_rate": 1.1732634523056718e-06, + "loss": 0.22098541259765625, + "step": 12985 + }, + { + "epoch": 0.8777882925510342, + "grad_norm": 0.591492235660553, + "learning_rate": 1.1719845340911922e-06, + "loss": 0.07598876953125, + "step": 12986 + }, + { + "epoch": 0.8778558875219684, + "grad_norm": 0.9007282853126526, + "learning_rate": 1.1707062849621013e-06, + "loss": 0.13795852661132812, + "step": 12987 + }, + { + "epoch": 0.8779234824929025, + "grad_norm": 0.433276891708374, + "learning_rate": 1.1694287049802398e-06, + "loss": 0.0743560791015625, + "step": 12988 + }, + { + "epoch": 0.8779910774638366, + "grad_norm": 0.563689649105072, + "learning_rate": 1.1681517942074332e-06, + "loss": 0.10766792297363281, + "step": 12989 + }, + { + "epoch": 0.8780586724347709, + "grad_norm": 0.2257329821586609, + "learning_rate": 1.166875552705457e-06, + "loss": 0.034515380859375, + "step": 12990 + }, + { + "epoch": 0.878126267405705, + "grad_norm": 0.947938859462738, + "learning_rate": 1.1655999805360717e-06, + "loss": 0.1128082275390625, + "step": 12991 + }, + { + "epoch": 0.8781938623766392, + "grad_norm": 0.8175796866416931, + "learning_rate": 1.164325077760993e-06, + "loss": 0.1279754638671875, + "step": 12992 + }, + { + "epoch": 0.8782614573475733, + "grad_norm": 0.4877479672431946, + "learning_rate": 1.1630508444419118e-06, + "loss": 0.079437255859375, + "step": 12993 + }, + { + "epoch": 0.8783290523185076, + "grad_norm": 0.697372317314148, + "learning_rate": 1.1617772806404818e-06, + "loss": 0.1143646240234375, + "step": 12994 + }, + { + "epoch": 0.8783966472894417, + "grad_norm": 0.3634900152683258, + "learning_rate": 1.1605043864183256e-06, + "loss": 0.056243896484375, + "step": 12995 + }, + { + "epoch": 0.8784642422603758, + "grad_norm": 0.5980051159858704, + "learning_rate": 1.1592321618370339e-06, + "loss": 0.10298919677734375, + "step": 12996 + }, + { + "epoch": 0.87853183723131, + "grad_norm": 0.46886613965034485, + "learning_rate": 1.1579606069581656e-06, + "loss": 0.097015380859375, + "step": 12997 + }, + { + "epoch": 0.8785994322022441, + "grad_norm": 0.40105631947517395, + "learning_rate": 1.1566897218432448e-06, + "loss": 0.08271026611328125, + "step": 12998 + }, + { + "epoch": 0.8786670271731783, + "grad_norm": 1.4153438806533813, + "learning_rate": 1.1554195065537643e-06, + "loss": 0.240966796875, + "step": 12999 + }, + { + "epoch": 0.8787346221441125, + "grad_norm": 0.9573836922645569, + "learning_rate": 1.1541499611511913e-06, + "loss": 0.18133544921875, + "step": 13000 + }, + { + "epoch": 0.8788022171150467, + "grad_norm": 0.41713517904281616, + "learning_rate": 1.1528810856969452e-06, + "loss": 0.05994415283203125, + "step": 13001 + }, + { + "epoch": 0.8788698120859808, + "grad_norm": 0.35403168201446533, + "learning_rate": 1.15161288025243e-06, + "loss": 0.056171417236328125, + "step": 13002 + }, + { + "epoch": 0.8789374070569149, + "grad_norm": 0.3242809772491455, + "learning_rate": 1.1503453448790018e-06, + "loss": 0.06369781494140625, + "step": 13003 + }, + { + "epoch": 0.8790050020278491, + "grad_norm": 0.4940910041332245, + "learning_rate": 1.1490784796379982e-06, + "loss": 0.0950164794921875, + "step": 13004 + }, + { + "epoch": 0.8790725969987833, + "grad_norm": 1.151347279548645, + "learning_rate": 1.1478122845907152e-06, + "loss": 0.160797119140625, + "step": 13005 + }, + { + "epoch": 0.8791401919697175, + "grad_norm": 0.8450053930282593, + "learning_rate": 1.1465467597984203e-06, + "loss": 0.120330810546875, + "step": 13006 + }, + { + "epoch": 0.8792077869406516, + "grad_norm": 1.6239862442016602, + "learning_rate": 1.1452819053223479e-06, + "loss": 0.204925537109375, + "step": 13007 + }, + { + "epoch": 0.8792753819115858, + "grad_norm": 0.5667906403541565, + "learning_rate": 1.1440177212236974e-06, + "loss": 0.09698486328125, + "step": 13008 + }, + { + "epoch": 0.8793429768825199, + "grad_norm": 0.25738999247550964, + "learning_rate": 1.1427542075636383e-06, + "loss": 0.03502655029296875, + "step": 13009 + }, + { + "epoch": 0.8794105718534541, + "grad_norm": 0.9000124931335449, + "learning_rate": 1.1414913644033099e-06, + "loss": 0.1142435073852539, + "step": 13010 + }, + { + "epoch": 0.8794781668243883, + "grad_norm": 0.36668848991394043, + "learning_rate": 1.1402291918038133e-06, + "loss": 0.06528854370117188, + "step": 13011 + }, + { + "epoch": 0.8795457617953224, + "grad_norm": 1.1018621921539307, + "learning_rate": 1.138967689826218e-06, + "loss": 0.204833984375, + "step": 13012 + }, + { + "epoch": 0.8796133567662566, + "grad_norm": 0.693187415599823, + "learning_rate": 1.137706858531572e-06, + "loss": 0.1207122802734375, + "step": 13013 + }, + { + "epoch": 0.8796809517371907, + "grad_norm": 0.3355056941509247, + "learning_rate": 1.136446697980873e-06, + "loss": 0.06209564208984375, + "step": 13014 + }, + { + "epoch": 0.879748546708125, + "grad_norm": 0.4540368318557739, + "learning_rate": 1.1351872082351005e-06, + "loss": 0.09563446044921875, + "step": 13015 + }, + { + "epoch": 0.8798161416790591, + "grad_norm": 0.9967982769012451, + "learning_rate": 1.133928389355196e-06, + "loss": 0.1030120849609375, + "step": 13016 + }, + { + "epoch": 0.8798837366499932, + "grad_norm": 0.7101942300796509, + "learning_rate": 1.132670241402069e-06, + "loss": 0.1241607666015625, + "step": 13017 + }, + { + "epoch": 0.8799513316209274, + "grad_norm": 0.7471499443054199, + "learning_rate": 1.131412764436594e-06, + "loss": 0.1385345458984375, + "step": 13018 + }, + { + "epoch": 0.8800189265918615, + "grad_norm": 1.3705319166183472, + "learning_rate": 1.1301559585196176e-06, + "loss": 0.2064971923828125, + "step": 13019 + }, + { + "epoch": 0.8800865215627958, + "grad_norm": 0.7936185002326965, + "learning_rate": 1.1288998237119524e-06, + "loss": 0.129241943359375, + "step": 13020 + }, + { + "epoch": 0.8801541165337299, + "grad_norm": 0.30838456749916077, + "learning_rate": 1.127644360074377e-06, + "loss": 0.0553741455078125, + "step": 13021 + }, + { + "epoch": 0.8802217115046641, + "grad_norm": 0.6876766681671143, + "learning_rate": 1.1263895676676389e-06, + "loss": 0.1183319091796875, + "step": 13022 + }, + { + "epoch": 0.8802893064755982, + "grad_norm": 0.6617056131362915, + "learning_rate": 1.1251354465524517e-06, + "loss": 0.1404266357421875, + "step": 13023 + }, + { + "epoch": 0.8803569014465323, + "grad_norm": 0.7181702852249146, + "learning_rate": 1.123881996789498e-06, + "loss": 0.119171142578125, + "step": 13024 + }, + { + "epoch": 0.8804244964174666, + "grad_norm": 0.5866822004318237, + "learning_rate": 1.1226292184394265e-06, + "loss": 0.0986480712890625, + "step": 13025 + }, + { + "epoch": 0.8804920913884007, + "grad_norm": 0.3977879583835602, + "learning_rate": 1.121377111562862e-06, + "loss": 0.0784454345703125, + "step": 13026 + }, + { + "epoch": 0.8805596863593349, + "grad_norm": 0.7740302085876465, + "learning_rate": 1.1201256762203754e-06, + "loss": 0.097412109375, + "step": 13027 + }, + { + "epoch": 0.880627281330269, + "grad_norm": 0.6384658813476562, + "learning_rate": 1.1188749124725305e-06, + "loss": 0.14951705932617188, + "step": 13028 + }, + { + "epoch": 0.8806948763012032, + "grad_norm": 0.7332258224487305, + "learning_rate": 1.117624820379844e-06, + "loss": 0.13974761962890625, + "step": 13029 + }, + { + "epoch": 0.8807624712721374, + "grad_norm": 0.4915197789669037, + "learning_rate": 1.1163754000028004e-06, + "loss": 0.0784912109375, + "step": 13030 + }, + { + "epoch": 0.8808300662430715, + "grad_norm": 0.7401928901672363, + "learning_rate": 1.1151266514018582e-06, + "loss": 0.09706497192382812, + "step": 13031 + }, + { + "epoch": 0.8808976612140057, + "grad_norm": 0.7546069622039795, + "learning_rate": 1.1138785746374357e-06, + "loss": 0.09629058837890625, + "step": 13032 + }, + { + "epoch": 0.8809652561849398, + "grad_norm": 1.1154425144195557, + "learning_rate": 1.1126311697699265e-06, + "loss": 0.15761566162109375, + "step": 13033 + }, + { + "epoch": 0.881032851155874, + "grad_norm": 0.2728986144065857, + "learning_rate": 1.1113844368596854e-06, + "loss": 0.036991119384765625, + "step": 13034 + }, + { + "epoch": 0.8811004461268082, + "grad_norm": 0.2991786003112793, + "learning_rate": 1.1101383759670376e-06, + "loss": 0.05307769775390625, + "step": 13035 + }, + { + "epoch": 0.8811680410977424, + "grad_norm": 1.0212833881378174, + "learning_rate": 1.1088929871522747e-06, + "loss": 0.18695068359375, + "step": 13036 + }, + { + "epoch": 0.8812356360686765, + "grad_norm": 0.9117931127548218, + "learning_rate": 1.1076482704756568e-06, + "loss": 0.158355712890625, + "step": 13037 + }, + { + "epoch": 0.8813032310396106, + "grad_norm": 0.6152516007423401, + "learning_rate": 1.1064042259974093e-06, + "loss": 0.126800537109375, + "step": 13038 + }, + { + "epoch": 0.8813708260105448, + "grad_norm": 1.1386009454727173, + "learning_rate": 1.1051608537777342e-06, + "loss": 0.218841552734375, + "step": 13039 + }, + { + "epoch": 0.881438420981479, + "grad_norm": 0.24793028831481934, + "learning_rate": 1.1039181538767812e-06, + "loss": 0.03696441650390625, + "step": 13040 + }, + { + "epoch": 0.8815060159524132, + "grad_norm": 1.3764464855194092, + "learning_rate": 1.1026761263546909e-06, + "loss": 0.20269775390625, + "step": 13041 + }, + { + "epoch": 0.8815736109233473, + "grad_norm": 1.5531432628631592, + "learning_rate": 1.1014347712715583e-06, + "loss": 0.225982666015625, + "step": 13042 + }, + { + "epoch": 0.8816412058942814, + "grad_norm": 0.7640218734741211, + "learning_rate": 1.1001940886874407e-06, + "loss": 0.09793853759765625, + "step": 13043 + }, + { + "epoch": 0.8817088008652156, + "grad_norm": 0.38328656554222107, + "learning_rate": 1.0989540786623764e-06, + "loss": 0.06488800048828125, + "step": 13044 + }, + { + "epoch": 0.8817763958361498, + "grad_norm": 0.9102234840393066, + "learning_rate": 1.097714741256366e-06, + "loss": 0.1683502197265625, + "step": 13045 + }, + { + "epoch": 0.881843990807084, + "grad_norm": 0.3053920567035675, + "learning_rate": 1.0964760765293713e-06, + "loss": 0.043792724609375, + "step": 13046 + }, + { + "epoch": 0.8819115857780181, + "grad_norm": 0.45933958888053894, + "learning_rate": 1.0952380845413296e-06, + "loss": 0.06906509399414062, + "step": 13047 + }, + { + "epoch": 0.8819791807489523, + "grad_norm": 0.9709993004798889, + "learning_rate": 1.0940007653521427e-06, + "loss": 0.1398773193359375, + "step": 13048 + }, + { + "epoch": 0.8820467757198864, + "grad_norm": 0.2663101553916931, + "learning_rate": 1.0927641190216797e-06, + "loss": 0.032073974609375, + "step": 13049 + }, + { + "epoch": 0.8821143706908205, + "grad_norm": 0.5040925741195679, + "learning_rate": 1.0915281456097758e-06, + "loss": 0.13568115234375, + "step": 13050 + }, + { + "epoch": 0.8821819656617548, + "grad_norm": 1.2204258441925049, + "learning_rate": 1.090292845176235e-06, + "loss": 0.1719207763671875, + "step": 13051 + }, + { + "epoch": 0.8822495606326889, + "grad_norm": 0.8823261260986328, + "learning_rate": 1.089058217780833e-06, + "loss": 0.11322021484375, + "step": 13052 + }, + { + "epoch": 0.8823171556036231, + "grad_norm": 0.6508342623710632, + "learning_rate": 1.087824263483303e-06, + "loss": 0.155792236328125, + "step": 13053 + }, + { + "epoch": 0.8823847505745572, + "grad_norm": 0.7479077577590942, + "learning_rate": 1.0865909823433546e-06, + "loss": 0.0732431411743164, + "step": 13054 + }, + { + "epoch": 0.8824523455454915, + "grad_norm": 0.8868075609207153, + "learning_rate": 1.085358374420663e-06, + "loss": 0.1485748291015625, + "step": 13055 + }, + { + "epoch": 0.8825199405164256, + "grad_norm": 0.748778223991394, + "learning_rate": 1.084126439774864e-06, + "loss": 0.1304473876953125, + "step": 13056 + }, + { + "epoch": 0.8825875354873597, + "grad_norm": 0.552716851234436, + "learning_rate": 1.0828951784655716e-06, + "loss": 0.104400634765625, + "step": 13057 + }, + { + "epoch": 0.8826551304582939, + "grad_norm": 0.730901837348938, + "learning_rate": 1.0816645905523598e-06, + "loss": 0.129119873046875, + "step": 13058 + }, + { + "epoch": 0.882722725429228, + "grad_norm": 0.6144089698791504, + "learning_rate": 1.0804346760947742e-06, + "loss": 0.1047515869140625, + "step": 13059 + }, + { + "epoch": 0.8827903204001623, + "grad_norm": 0.28081074357032776, + "learning_rate": 1.0792054351523223e-06, + "loss": 0.03727912902832031, + "step": 13060 + }, + { + "epoch": 0.8828579153710964, + "grad_norm": 1.162115216255188, + "learning_rate": 1.077976867784483e-06, + "loss": 0.179840087890625, + "step": 13061 + }, + { + "epoch": 0.8829255103420306, + "grad_norm": 0.3465488851070404, + "learning_rate": 1.0767489740507008e-06, + "loss": 0.04917144775390625, + "step": 13062 + }, + { + "epoch": 0.8829931053129647, + "grad_norm": 0.26516827940940857, + "learning_rate": 1.0755217540103978e-06, + "loss": 0.0613250732421875, + "step": 13063 + }, + { + "epoch": 0.8830607002838988, + "grad_norm": 0.4890276789665222, + "learning_rate": 1.0742952077229417e-06, + "loss": 0.08421707153320312, + "step": 13064 + }, + { + "epoch": 0.883128295254833, + "grad_norm": 0.7201829552650452, + "learning_rate": 1.07306933524769e-06, + "loss": 0.105743408203125, + "step": 13065 + }, + { + "epoch": 0.8831958902257672, + "grad_norm": 0.40453028678894043, + "learning_rate": 1.0718441366439535e-06, + "loss": 0.08153533935546875, + "step": 13066 + }, + { + "epoch": 0.8832634851967014, + "grad_norm": 0.9322569966316223, + "learning_rate": 1.0706196119710132e-06, + "loss": 0.160919189453125, + "step": 13067 + }, + { + "epoch": 0.8833310801676355, + "grad_norm": 0.7583255767822266, + "learning_rate": 1.0693957612881283e-06, + "loss": 0.1549072265625, + "step": 13068 + }, + { + "epoch": 0.8833986751385697, + "grad_norm": 1.0163516998291016, + "learning_rate": 1.0681725846545033e-06, + "loss": 0.08504104614257812, + "step": 13069 + }, + { + "epoch": 0.8834662701095038, + "grad_norm": 1.0192539691925049, + "learning_rate": 1.066950082129332e-06, + "loss": 0.18994140625, + "step": 13070 + }, + { + "epoch": 0.883533865080438, + "grad_norm": 0.25340625643730164, + "learning_rate": 1.0657282537717661e-06, + "loss": 0.023681640625, + "step": 13071 + }, + { + "epoch": 0.8836014600513722, + "grad_norm": 1.0361088514328003, + "learning_rate": 1.0645070996409229e-06, + "loss": 0.172454833984375, + "step": 13072 + }, + { + "epoch": 0.8836690550223063, + "grad_norm": 0.3015957772731781, + "learning_rate": 1.0632866197958886e-06, + "loss": 0.05328369140625, + "step": 13073 + }, + { + "epoch": 0.8837366499932405, + "grad_norm": 1.3547242879867554, + "learning_rate": 1.0620668142957207e-06, + "loss": 0.12987518310546875, + "step": 13074 + }, + { + "epoch": 0.8838042449641746, + "grad_norm": 0.24400530755519867, + "learning_rate": 1.0608476831994374e-06, + "loss": 0.03481292724609375, + "step": 13075 + }, + { + "epoch": 0.8838718399351089, + "grad_norm": 0.862579882144928, + "learning_rate": 1.0596292265660329e-06, + "loss": 0.10246658325195312, + "step": 13076 + }, + { + "epoch": 0.883939434906043, + "grad_norm": 0.3549771308898926, + "learning_rate": 1.0584114444544602e-06, + "loss": 0.06649017333984375, + "step": 13077 + }, + { + "epoch": 0.8840070298769771, + "grad_norm": 0.973667562007904, + "learning_rate": 1.0571943369236403e-06, + "loss": 0.08919525146484375, + "step": 13078 + }, + { + "epoch": 0.8840746248479113, + "grad_norm": 1.4031457901000977, + "learning_rate": 1.0559779040324712e-06, + "loss": 0.260833740234375, + "step": 13079 + }, + { + "epoch": 0.8841422198188454, + "grad_norm": 0.5964077115058899, + "learning_rate": 1.0547621458398061e-06, + "loss": 0.1175994873046875, + "step": 13080 + }, + { + "epoch": 0.8842098147897797, + "grad_norm": 0.3558505177497864, + "learning_rate": 1.0535470624044758e-06, + "loss": 0.06497955322265625, + "step": 13081 + }, + { + "epoch": 0.8842774097607138, + "grad_norm": 0.2880922853946686, + "learning_rate": 1.052332653785265e-06, + "loss": 0.036190032958984375, + "step": 13082 + }, + { + "epoch": 0.884345004731648, + "grad_norm": 0.24600611627101898, + "learning_rate": 1.0511189200409439e-06, + "loss": 0.0346221923828125, + "step": 13083 + }, + { + "epoch": 0.8844125997025821, + "grad_norm": 0.5357607007026672, + "learning_rate": 1.0499058612302366e-06, + "loss": 0.0932159423828125, + "step": 13084 + }, + { + "epoch": 0.8844801946735162, + "grad_norm": 0.6156560778617859, + "learning_rate": 1.0486934774118378e-06, + "loss": 0.09135055541992188, + "step": 13085 + }, + { + "epoch": 0.8845477896444505, + "grad_norm": 0.3690354526042938, + "learning_rate": 1.047481768644411e-06, + "loss": 0.04828453063964844, + "step": 13086 + }, + { + "epoch": 0.8846153846153846, + "grad_norm": 0.3964006304740906, + "learning_rate": 1.0462707349865875e-06, + "loss": 0.08007049560546875, + "step": 13087 + }, + { + "epoch": 0.8846829795863188, + "grad_norm": 0.44779717922210693, + "learning_rate": 1.0450603764969585e-06, + "loss": 0.06656455993652344, + "step": 13088 + }, + { + "epoch": 0.8847505745572529, + "grad_norm": 0.49424272775650024, + "learning_rate": 1.0438506932340992e-06, + "loss": 0.099578857421875, + "step": 13089 + }, + { + "epoch": 0.8848181695281871, + "grad_norm": 0.513419508934021, + "learning_rate": 1.042641685256534e-06, + "loss": 0.07288551330566406, + "step": 13090 + }, + { + "epoch": 0.8848857644991213, + "grad_norm": 0.4808555245399475, + "learning_rate": 1.0414333526227598e-06, + "loss": 0.0867156982421875, + "step": 13091 + }, + { + "epoch": 0.8849533594700554, + "grad_norm": 0.4989417493343353, + "learning_rate": 1.0402256953912531e-06, + "loss": 0.1070709228515625, + "step": 13092 + }, + { + "epoch": 0.8850209544409896, + "grad_norm": 0.5156048536300659, + "learning_rate": 1.0390187136204389e-06, + "loss": 0.11013031005859375, + "step": 13093 + }, + { + "epoch": 0.8850885494119237, + "grad_norm": 0.22254735231399536, + "learning_rate": 1.0378124073687251e-06, + "loss": 0.0330657958984375, + "step": 13094 + }, + { + "epoch": 0.885156144382858, + "grad_norm": 0.91324782371521, + "learning_rate": 1.0366067766944721e-06, + "loss": 0.1879425048828125, + "step": 13095 + }, + { + "epoch": 0.8852237393537921, + "grad_norm": 0.9055196046829224, + "learning_rate": 1.035401821656023e-06, + "loss": 0.1409912109375, + "step": 13096 + }, + { + "epoch": 0.8852913343247263, + "grad_norm": 1.153091549873352, + "learning_rate": 1.0341975423116778e-06, + "loss": 0.11762809753417969, + "step": 13097 + }, + { + "epoch": 0.8853589292956604, + "grad_norm": 0.46877744793891907, + "learning_rate": 1.0329939387197086e-06, + "loss": 0.0760040283203125, + "step": 13098 + }, + { + "epoch": 0.8854265242665945, + "grad_norm": 0.32396870851516724, + "learning_rate": 1.0317910109383533e-06, + "loss": 0.04428863525390625, + "step": 13099 + }, + { + "epoch": 0.8854941192375287, + "grad_norm": 0.34330156445503235, + "learning_rate": 1.0305887590258156e-06, + "loss": 0.06856536865234375, + "step": 13100 + }, + { + "epoch": 0.8855617142084629, + "grad_norm": 0.4894864559173584, + "learning_rate": 1.029387183040269e-06, + "loss": 0.09836578369140625, + "step": 13101 + }, + { + "epoch": 0.8856293091793971, + "grad_norm": 0.5422512888908386, + "learning_rate": 1.028186283039852e-06, + "loss": 0.07895660400390625, + "step": 13102 + }, + { + "epoch": 0.8856969041503312, + "grad_norm": 0.20946064591407776, + "learning_rate": 1.026986059082673e-06, + "loss": 0.037044525146484375, + "step": 13103 + }, + { + "epoch": 0.8857644991212654, + "grad_norm": 0.6034077405929565, + "learning_rate": 1.025786511226804e-06, + "loss": 0.12384796142578125, + "step": 13104 + }, + { + "epoch": 0.8858320940921995, + "grad_norm": 0.37705639004707336, + "learning_rate": 1.0245876395302916e-06, + "loss": 0.05681610107421875, + "step": 13105 + }, + { + "epoch": 0.8858996890631337, + "grad_norm": 1.0955277681350708, + "learning_rate": 1.0233894440511365e-06, + "loss": 0.1927032470703125, + "step": 13106 + }, + { + "epoch": 0.8859672840340679, + "grad_norm": 0.3561304211616516, + "learning_rate": 1.0221919248473238e-06, + "loss": 0.04819488525390625, + "step": 13107 + }, + { + "epoch": 0.886034879005002, + "grad_norm": 0.9010157585144043, + "learning_rate": 1.0209950819767921e-06, + "loss": 0.1521759033203125, + "step": 13108 + }, + { + "epoch": 0.8861024739759362, + "grad_norm": 0.4659939110279083, + "learning_rate": 1.0197989154974518e-06, + "loss": 0.06983184814453125, + "step": 13109 + }, + { + "epoch": 0.8861700689468703, + "grad_norm": 1.1240538358688354, + "learning_rate": 1.0186034254671833e-06, + "loss": 0.13181686401367188, + "step": 13110 + }, + { + "epoch": 0.8862376639178046, + "grad_norm": 0.6793886423110962, + "learning_rate": 1.0174086119438304e-06, + "loss": 0.09453582763671875, + "step": 13111 + }, + { + "epoch": 0.8863052588887387, + "grad_norm": 0.27245575189590454, + "learning_rate": 1.0162144749852049e-06, + "loss": 0.04087066650390625, + "step": 13112 + }, + { + "epoch": 0.8863728538596728, + "grad_norm": 0.9395737648010254, + "learning_rate": 1.0150210146490873e-06, + "loss": 0.15570068359375, + "step": 13113 + }, + { + "epoch": 0.886440448830607, + "grad_norm": 0.8490308523178101, + "learning_rate": 1.0138282309932252e-06, + "loss": 0.19024658203125, + "step": 13114 + }, + { + "epoch": 0.8865080438015411, + "grad_norm": 0.389547735452652, + "learning_rate": 1.0126361240753317e-06, + "loss": 0.05521392822265625, + "step": 13115 + }, + { + "epoch": 0.8865756387724754, + "grad_norm": 1.010797142982483, + "learning_rate": 1.0114446939530897e-06, + "loss": 0.13276290893554688, + "step": 13116 + }, + { + "epoch": 0.8866432337434095, + "grad_norm": 0.7168452143669128, + "learning_rate": 1.0102539406841444e-06, + "loss": 0.12134552001953125, + "step": 13117 + }, + { + "epoch": 0.8867108287143437, + "grad_norm": 0.7695521712303162, + "learning_rate": 1.0090638643261195e-06, + "loss": 0.1296539306640625, + "step": 13118 + }, + { + "epoch": 0.8867784236852778, + "grad_norm": 0.7142612338066101, + "learning_rate": 1.0078744649365879e-06, + "loss": 0.11651611328125, + "step": 13119 + }, + { + "epoch": 0.8868460186562119, + "grad_norm": 0.2883061468601227, + "learning_rate": 1.0066857425731097e-06, + "loss": 0.0395660400390625, + "step": 13120 + }, + { + "epoch": 0.8869136136271462, + "grad_norm": 0.612398624420166, + "learning_rate": 1.005497697293199e-06, + "loss": 0.08953857421875, + "step": 13121 + }, + { + "epoch": 0.8869812085980803, + "grad_norm": 0.9631105661392212, + "learning_rate": 1.00431032915434e-06, + "loss": 0.129119873046875, + "step": 13122 + }, + { + "epoch": 0.8870488035690145, + "grad_norm": 0.22964124381542206, + "learning_rate": 1.0031236382139846e-06, + "loss": 0.02545928955078125, + "step": 13123 + }, + { + "epoch": 0.8871163985399486, + "grad_norm": 1.2472968101501465, + "learning_rate": 1.0019376245295542e-06, + "loss": 0.2059478759765625, + "step": 13124 + }, + { + "epoch": 0.8871839935108828, + "grad_norm": 1.358694076538086, + "learning_rate": 1.000752288158434e-06, + "loss": 0.1616973876953125, + "step": 13125 + }, + { + "epoch": 0.887251588481817, + "grad_norm": 1.1063141822814941, + "learning_rate": 9.995676291579786e-07, + "loss": 0.16278076171875, + "step": 13126 + }, + { + "epoch": 0.8873191834527511, + "grad_norm": 0.9977242350578308, + "learning_rate": 9.983836475855102e-07, + "loss": 0.15460205078125, + "step": 13127 + }, + { + "epoch": 0.8873867784236853, + "grad_norm": 1.1084134578704834, + "learning_rate": 9.972003434983146e-07, + "loss": 0.121124267578125, + "step": 13128 + }, + { + "epoch": 0.8874543733946194, + "grad_norm": 0.48747241497039795, + "learning_rate": 9.960177169536494e-07, + "loss": 0.08858489990234375, + "step": 13129 + }, + { + "epoch": 0.8875219683655536, + "grad_norm": 0.581576943397522, + "learning_rate": 9.948357680087356e-07, + "loss": 0.0840301513671875, + "step": 13130 + }, + { + "epoch": 0.8875895633364878, + "grad_norm": 0.49246302247047424, + "learning_rate": 9.936544967207672e-07, + "loss": 0.09917449951171875, + "step": 13131 + }, + { + "epoch": 0.887657158307422, + "grad_norm": 0.3850966989994049, + "learning_rate": 9.924739031468955e-07, + "loss": 0.06982040405273438, + "step": 13132 + }, + { + "epoch": 0.8877247532783561, + "grad_norm": 0.697469174861908, + "learning_rate": 9.912939873442495e-07, + "loss": 0.13934326171875, + "step": 13133 + }, + { + "epoch": 0.8877923482492902, + "grad_norm": 0.29042693972587585, + "learning_rate": 9.901147493699203e-07, + "loss": 0.046535491943359375, + "step": 13134 + }, + { + "epoch": 0.8878599432202244, + "grad_norm": 1.0471231937408447, + "learning_rate": 9.889361892809672e-07, + "loss": 0.165069580078125, + "step": 13135 + }, + { + "epoch": 0.8879275381911585, + "grad_norm": 0.9358488917350769, + "learning_rate": 9.877583071344133e-07, + "loss": 0.197021484375, + "step": 13136 + }, + { + "epoch": 0.8879951331620928, + "grad_norm": 1.1437528133392334, + "learning_rate": 9.86581102987254e-07, + "loss": 0.12578868865966797, + "step": 13137 + }, + { + "epoch": 0.8880627281330269, + "grad_norm": 0.26017212867736816, + "learning_rate": 9.854045768964493e-07, + "loss": 0.03678131103515625, + "step": 13138 + }, + { + "epoch": 0.8881303231039611, + "grad_norm": 0.5767015814781189, + "learning_rate": 9.842287289189283e-07, + "loss": 0.08704566955566406, + "step": 13139 + }, + { + "epoch": 0.8881979180748952, + "grad_norm": 1.2062088251113892, + "learning_rate": 9.83053559111584e-07, + "loss": 0.1514129638671875, + "step": 13140 + }, + { + "epoch": 0.8882655130458293, + "grad_norm": 1.103139042854309, + "learning_rate": 9.818790675312778e-07, + "loss": 0.1500701904296875, + "step": 13141 + }, + { + "epoch": 0.8883331080167636, + "grad_norm": 1.1503804922103882, + "learning_rate": 9.807052542348422e-07, + "loss": 0.18975830078125, + "step": 13142 + }, + { + "epoch": 0.8884007029876977, + "grad_norm": 0.5801032781600952, + "learning_rate": 9.79532119279067e-07, + "loss": 0.12225341796875, + "step": 13143 + }, + { + "epoch": 0.8884682979586319, + "grad_norm": 0.48441120982170105, + "learning_rate": 9.783596627207264e-07, + "loss": 0.0869607925415039, + "step": 13144 + }, + { + "epoch": 0.888535892929566, + "grad_norm": 0.20627541840076447, + "learning_rate": 9.771878846165388e-07, + "loss": 0.028387069702148438, + "step": 13145 + }, + { + "epoch": 0.8886034879005003, + "grad_norm": 0.22839395701885223, + "learning_rate": 9.760167850232122e-07, + "loss": 0.030609130859375, + "step": 13146 + }, + { + "epoch": 0.8886710828714344, + "grad_norm": 1.4831899404525757, + "learning_rate": 9.748463639974075e-07, + "loss": 0.135223388671875, + "step": 13147 + }, + { + "epoch": 0.8887386778423685, + "grad_norm": 0.27626875042915344, + "learning_rate": 9.736766215957565e-07, + "loss": 0.027439117431640625, + "step": 13148 + }, + { + "epoch": 0.8888062728133027, + "grad_norm": 0.2901090383529663, + "learning_rate": 9.725075578748588e-07, + "loss": 0.04471588134765625, + "step": 13149 + }, + { + "epoch": 0.8888738677842368, + "grad_norm": 0.29110702872276306, + "learning_rate": 9.713391728912823e-07, + "loss": 0.04117584228515625, + "step": 13150 + }, + { + "epoch": 0.888941462755171, + "grad_norm": 0.509220540523529, + "learning_rate": 9.701714667015604e-07, + "loss": 0.060184478759765625, + "step": 13151 + }, + { + "epoch": 0.8890090577261052, + "grad_norm": 0.29294517636299133, + "learning_rate": 9.690044393621928e-07, + "loss": 0.044803619384765625, + "step": 13152 + }, + { + "epoch": 0.8890766526970394, + "grad_norm": 0.35702386498451233, + "learning_rate": 9.678380909296491e-07, + "loss": 0.05841064453125, + "step": 13153 + }, + { + "epoch": 0.8891442476679735, + "grad_norm": 0.8247957825660706, + "learning_rate": 9.666724214603594e-07, + "loss": 0.138397216796875, + "step": 13154 + }, + { + "epoch": 0.8892118426389076, + "grad_norm": 0.31941285729408264, + "learning_rate": 9.65507431010737e-07, + "loss": 0.044055938720703125, + "step": 13155 + }, + { + "epoch": 0.8892794376098418, + "grad_norm": 0.761959969997406, + "learning_rate": 9.643431196371382e-07, + "loss": 0.12446975708007812, + "step": 13156 + }, + { + "epoch": 0.889347032580776, + "grad_norm": 0.6864991188049316, + "learning_rate": 9.631794873959131e-07, + "loss": 0.12836837768554688, + "step": 13157 + }, + { + "epoch": 0.8894146275517102, + "grad_norm": 0.8137268424034119, + "learning_rate": 9.620165343433517e-07, + "loss": 0.137725830078125, + "step": 13158 + }, + { + "epoch": 0.8894822225226443, + "grad_norm": 0.6489362716674805, + "learning_rate": 9.60854260535734e-07, + "loss": 0.10909271240234375, + "step": 13159 + }, + { + "epoch": 0.8895498174935785, + "grad_norm": 1.2945911884307861, + "learning_rate": 9.596926660293e-07, + "loss": 0.14884185791015625, + "step": 13160 + }, + { + "epoch": 0.8896174124645126, + "grad_norm": 1.5442783832550049, + "learning_rate": 9.585317508802445e-07, + "loss": 0.17765045166015625, + "step": 13161 + }, + { + "epoch": 0.8896850074354468, + "grad_norm": 0.508128821849823, + "learning_rate": 9.573715151447476e-07, + "loss": 0.0838470458984375, + "step": 13162 + }, + { + "epoch": 0.889752602406381, + "grad_norm": 0.3656211793422699, + "learning_rate": 9.562119588789481e-07, + "loss": 0.061309814453125, + "step": 13163 + }, + { + "epoch": 0.8898201973773151, + "grad_norm": 1.7149572372436523, + "learning_rate": 9.55053082138951e-07, + "loss": 0.12482452392578125, + "step": 13164 + }, + { + "epoch": 0.8898877923482493, + "grad_norm": 0.6170923709869385, + "learning_rate": 9.53894884980831e-07, + "loss": 0.086334228515625, + "step": 13165 + }, + { + "epoch": 0.8899553873191834, + "grad_norm": 0.38803166151046753, + "learning_rate": 9.527373674606271e-07, + "loss": 0.0559844970703125, + "step": 13166 + }, + { + "epoch": 0.8900229822901177, + "grad_norm": 0.7534748315811157, + "learning_rate": 9.515805296343461e-07, + "loss": 0.0989837646484375, + "step": 13167 + }, + { + "epoch": 0.8900905772610518, + "grad_norm": 0.8251864314079285, + "learning_rate": 9.504243715579696e-07, + "loss": 0.09984207153320312, + "step": 13168 + }, + { + "epoch": 0.8901581722319859, + "grad_norm": 0.9813548922538757, + "learning_rate": 9.492688932874316e-07, + "loss": 0.12628936767578125, + "step": 13169 + }, + { + "epoch": 0.8902257672029201, + "grad_norm": 0.5651686191558838, + "learning_rate": 9.481140948786487e-07, + "loss": 0.1112518310546875, + "step": 13170 + }, + { + "epoch": 0.8902933621738542, + "grad_norm": 2.724494457244873, + "learning_rate": 9.469599763874931e-07, + "loss": 0.16495513916015625, + "step": 13171 + }, + { + "epoch": 0.8903609571447885, + "grad_norm": 0.9522347450256348, + "learning_rate": 9.458065378698066e-07, + "loss": 0.1705322265625, + "step": 13172 + }, + { + "epoch": 0.8904285521157226, + "grad_norm": 1.1825588941574097, + "learning_rate": 9.446537793814047e-07, + "loss": 0.17669677734375, + "step": 13173 + }, + { + "epoch": 0.8904961470866567, + "grad_norm": 0.5528464317321777, + "learning_rate": 9.435017009780594e-07, + "loss": 0.12357330322265625, + "step": 13174 + }, + { + "epoch": 0.8905637420575909, + "grad_norm": 0.7534549236297607, + "learning_rate": 9.423503027155194e-07, + "loss": 0.1269073486328125, + "step": 13175 + }, + { + "epoch": 0.890631337028525, + "grad_norm": 0.44145482778549194, + "learning_rate": 9.411995846494953e-07, + "loss": 0.06560897827148438, + "step": 13176 + }, + { + "epoch": 0.8906989319994593, + "grad_norm": 0.2723003327846527, + "learning_rate": 9.400495468356657e-07, + "loss": 0.037113189697265625, + "step": 13177 + }, + { + "epoch": 0.8907665269703934, + "grad_norm": 1.1583892107009888, + "learning_rate": 9.389001893296778e-07, + "loss": 0.145782470703125, + "step": 13178 + }, + { + "epoch": 0.8908341219413276, + "grad_norm": 0.62851881980896, + "learning_rate": 9.377515121871438e-07, + "loss": 0.13739013671875, + "step": 13179 + }, + { + "epoch": 0.8909017169122617, + "grad_norm": 0.5645835399627686, + "learning_rate": 9.36603515463641e-07, + "loss": 0.11530303955078125, + "step": 13180 + }, + { + "epoch": 0.8909693118831958, + "grad_norm": 0.827685534954071, + "learning_rate": 9.354561992147232e-07, + "loss": 0.09432792663574219, + "step": 13181 + }, + { + "epoch": 0.8910369068541301, + "grad_norm": 0.8191909193992615, + "learning_rate": 9.343095634958976e-07, + "loss": 0.12668228149414062, + "step": 13182 + }, + { + "epoch": 0.8911045018250642, + "grad_norm": 0.32668083906173706, + "learning_rate": 9.331636083626516e-07, + "loss": 0.041805267333984375, + "step": 13183 + }, + { + "epoch": 0.8911720967959984, + "grad_norm": 0.3563324213027954, + "learning_rate": 9.320183338704325e-07, + "loss": 0.052581787109375, + "step": 13184 + }, + { + "epoch": 0.8912396917669325, + "grad_norm": 1.0653935670852661, + "learning_rate": 9.308737400746492e-07, + "loss": 0.1162261962890625, + "step": 13185 + }, + { + "epoch": 0.8913072867378667, + "grad_norm": 1.0576492547988892, + "learning_rate": 9.297298270306942e-07, + "loss": 0.164398193359375, + "step": 13186 + }, + { + "epoch": 0.8913748817088009, + "grad_norm": 0.3692640960216522, + "learning_rate": 9.285865947939082e-07, + "loss": 0.0852508544921875, + "step": 13187 + }, + { + "epoch": 0.891442476679735, + "grad_norm": 0.5315544009208679, + "learning_rate": 9.274440434196136e-07, + "loss": 0.0720977783203125, + "step": 13188 + }, + { + "epoch": 0.8915100716506692, + "grad_norm": 0.9145853519439697, + "learning_rate": 9.263021729630927e-07, + "loss": 0.17156982421875, + "step": 13189 + }, + { + "epoch": 0.8915776666216033, + "grad_norm": 1.8044131994247437, + "learning_rate": 9.251609834795966e-07, + "loss": 0.22418212890625, + "step": 13190 + }, + { + "epoch": 0.8916452615925375, + "grad_norm": 0.3043161630630493, + "learning_rate": 9.240204750243409e-07, + "loss": 0.05139923095703125, + "step": 13191 + }, + { + "epoch": 0.8917128565634717, + "grad_norm": 0.5076255202293396, + "learning_rate": 9.228806476525148e-07, + "loss": 0.0931084156036377, + "step": 13192 + }, + { + "epoch": 0.8917804515344059, + "grad_norm": 0.3429821729660034, + "learning_rate": 9.217415014192627e-07, + "loss": 0.0378875732421875, + "step": 13193 + }, + { + "epoch": 0.89184804650534, + "grad_norm": 1.419092059135437, + "learning_rate": 9.206030363797153e-07, + "loss": 0.185028076171875, + "step": 13194 + }, + { + "epoch": 0.8919156414762741, + "grad_norm": 0.4525078535079956, + "learning_rate": 9.194652525889486e-07, + "loss": 0.06021881103515625, + "step": 13195 + }, + { + "epoch": 0.8919832364472083, + "grad_norm": 0.7788112163543701, + "learning_rate": 9.183281501020169e-07, + "loss": 0.15008544921875, + "step": 13196 + }, + { + "epoch": 0.8920508314181425, + "grad_norm": 0.29269516468048096, + "learning_rate": 9.171917289739462e-07, + "loss": 0.0446929931640625, + "step": 13197 + }, + { + "epoch": 0.8921184263890767, + "grad_norm": 0.902813732624054, + "learning_rate": 9.160559892597142e-07, + "loss": 0.11943817138671875, + "step": 13198 + }, + { + "epoch": 0.8921860213600108, + "grad_norm": 0.3824375867843628, + "learning_rate": 9.149209310142853e-07, + "loss": 0.03621673583984375, + "step": 13199 + }, + { + "epoch": 0.892253616330945, + "grad_norm": 0.4617058038711548, + "learning_rate": 9.137865542925738e-07, + "loss": 0.08037567138671875, + "step": 13200 + }, + { + "epoch": 0.8923212113018791, + "grad_norm": 0.1767706423997879, + "learning_rate": 9.12652859149471e-07, + "loss": 0.020061492919921875, + "step": 13201 + }, + { + "epoch": 0.8923888062728133, + "grad_norm": 0.25608137249946594, + "learning_rate": 9.115198456398311e-07, + "loss": 0.04635810852050781, + "step": 13202 + }, + { + "epoch": 0.8924564012437475, + "grad_norm": 0.6110490560531616, + "learning_rate": 9.103875138184769e-07, + "loss": 0.100738525390625, + "step": 13203 + }, + { + "epoch": 0.8925239962146816, + "grad_norm": 0.4285658001899719, + "learning_rate": 9.092558637401966e-07, + "loss": 0.06676483154296875, + "step": 13204 + }, + { + "epoch": 0.8925915911856158, + "grad_norm": 1.4994145631790161, + "learning_rate": 9.081248954597476e-07, + "loss": 0.146942138671875, + "step": 13205 + }, + { + "epoch": 0.8926591861565499, + "grad_norm": 0.6503381729125977, + "learning_rate": 9.069946090318515e-07, + "loss": 0.11383056640625, + "step": 13206 + }, + { + "epoch": 0.8927267811274842, + "grad_norm": 1.0092302560806274, + "learning_rate": 9.058650045111994e-07, + "loss": 0.177398681640625, + "step": 13207 + }, + { + "epoch": 0.8927943760984183, + "grad_norm": 1.3373196125030518, + "learning_rate": 9.047360819524509e-07, + "loss": 0.1658477783203125, + "step": 13208 + }, + { + "epoch": 0.8928619710693524, + "grad_norm": 0.8679846525192261, + "learning_rate": 9.036078414102239e-07, + "loss": 0.141448974609375, + "step": 13209 + }, + { + "epoch": 0.8929295660402866, + "grad_norm": 1.130342721939087, + "learning_rate": 9.024802829391199e-07, + "loss": 0.1868133544921875, + "step": 13210 + }, + { + "epoch": 0.8929971610112207, + "grad_norm": 0.6509988903999329, + "learning_rate": 9.013534065936851e-07, + "loss": 0.1366729736328125, + "step": 13211 + }, + { + "epoch": 0.893064755982155, + "grad_norm": 1.4061026573181152, + "learning_rate": 9.002272124284561e-07, + "loss": 0.14896392822265625, + "step": 13212 + }, + { + "epoch": 0.8931323509530891, + "grad_norm": 0.8278476595878601, + "learning_rate": 8.991017004979174e-07, + "loss": 0.11597824096679688, + "step": 13213 + }, + { + "epoch": 0.8931999459240233, + "grad_norm": 1.1026149988174438, + "learning_rate": 8.979768708565323e-07, + "loss": 0.17547607421875, + "step": 13214 + }, + { + "epoch": 0.8932675408949574, + "grad_norm": 0.5869408249855042, + "learning_rate": 8.968527235587253e-07, + "loss": 0.08252716064453125, + "step": 13215 + }, + { + "epoch": 0.8933351358658915, + "grad_norm": 0.5234887599945068, + "learning_rate": 8.9572925865889e-07, + "loss": 0.0667724609375, + "step": 13216 + }, + { + "epoch": 0.8934027308368258, + "grad_norm": 0.7890082597732544, + "learning_rate": 8.946064762113859e-07, + "loss": 0.1198883056640625, + "step": 13217 + }, + { + "epoch": 0.8934703258077599, + "grad_norm": 0.6123899817466736, + "learning_rate": 8.934843762705447e-07, + "loss": 0.1309814453125, + "step": 13218 + }, + { + "epoch": 0.8935379207786941, + "grad_norm": 0.8838256597518921, + "learning_rate": 8.923629588906529e-07, + "loss": 0.11208343505859375, + "step": 13219 + }, + { + "epoch": 0.8936055157496282, + "grad_norm": 0.46128836274147034, + "learning_rate": 8.912422241259771e-07, + "loss": 0.08341598510742188, + "step": 13220 + }, + { + "epoch": 0.8936731107205624, + "grad_norm": 0.6178842186927795, + "learning_rate": 8.901221720307439e-07, + "loss": 0.13256072998046875, + "step": 13221 + }, + { + "epoch": 0.8937407056914966, + "grad_norm": 0.41545596718788147, + "learning_rate": 8.890028026591468e-07, + "loss": 0.07306671142578125, + "step": 13222 + }, + { + "epoch": 0.8938083006624307, + "grad_norm": 0.9326785206794739, + "learning_rate": 8.878841160653523e-07, + "loss": 0.17572021484375, + "step": 13223 + }, + { + "epoch": 0.8938758956333649, + "grad_norm": 0.6652302742004395, + "learning_rate": 8.867661123034837e-07, + "loss": 0.1052093505859375, + "step": 13224 + }, + { + "epoch": 0.893943490604299, + "grad_norm": 0.4825652539730072, + "learning_rate": 8.856487914276412e-07, + "loss": 0.08404731750488281, + "step": 13225 + }, + { + "epoch": 0.8940110855752332, + "grad_norm": 0.8665629625320435, + "learning_rate": 8.845321534918866e-07, + "loss": 0.1179656982421875, + "step": 13226 + }, + { + "epoch": 0.8940786805461673, + "grad_norm": 0.8980771899223328, + "learning_rate": 8.834161985502498e-07, + "loss": 0.1229705810546875, + "step": 13227 + }, + { + "epoch": 0.8941462755171016, + "grad_norm": 0.45190560817718506, + "learning_rate": 8.823009266567261e-07, + "loss": 0.0894317626953125, + "step": 13228 + }, + { + "epoch": 0.8942138704880357, + "grad_norm": 0.540160596370697, + "learning_rate": 8.811863378652824e-07, + "loss": 0.07757186889648438, + "step": 13229 + }, + { + "epoch": 0.8942814654589698, + "grad_norm": 0.8589622378349304, + "learning_rate": 8.80072432229847e-07, + "loss": 0.11308670043945312, + "step": 13230 + }, + { + "epoch": 0.894349060429904, + "grad_norm": 0.783149242401123, + "learning_rate": 8.78959209804317e-07, + "loss": 0.1335601806640625, + "step": 13231 + }, + { + "epoch": 0.8944166554008381, + "grad_norm": 0.8905947208404541, + "learning_rate": 8.778466706425592e-07, + "loss": 0.1199951171875, + "step": 13232 + }, + { + "epoch": 0.8944842503717724, + "grad_norm": 0.537825345993042, + "learning_rate": 8.767348147984039e-07, + "loss": 0.080963134765625, + "step": 13233 + }, + { + "epoch": 0.8945518453427065, + "grad_norm": 1.161291480064392, + "learning_rate": 8.756236423256514e-07, + "loss": 0.189605712890625, + "step": 13234 + }, + { + "epoch": 0.8946194403136407, + "grad_norm": 0.5103653073310852, + "learning_rate": 8.74513153278062e-07, + "loss": 0.05777740478515625, + "step": 13235 + }, + { + "epoch": 0.8946870352845748, + "grad_norm": 1.5169049501419067, + "learning_rate": 8.734033477093762e-07, + "loss": 0.15485382080078125, + "step": 13236 + }, + { + "epoch": 0.8947546302555089, + "grad_norm": 0.608042299747467, + "learning_rate": 8.722942256732841e-07, + "loss": 0.107086181640625, + "step": 13237 + }, + { + "epoch": 0.8948222252264432, + "grad_norm": 1.8113136291503906, + "learning_rate": 8.711857872234597e-07, + "loss": 0.17071533203125, + "step": 13238 + }, + { + "epoch": 0.8948898201973773, + "grad_norm": 0.19546568393707275, + "learning_rate": 8.700780324135332e-07, + "loss": 0.024211883544921875, + "step": 13239 + }, + { + "epoch": 0.8949574151683115, + "grad_norm": 0.4365512430667877, + "learning_rate": 8.689709612971037e-07, + "loss": 0.08104705810546875, + "step": 13240 + }, + { + "epoch": 0.8950250101392456, + "grad_norm": 0.27810633182525635, + "learning_rate": 8.678645739277396e-07, + "loss": 0.021019935607910156, + "step": 13241 + }, + { + "epoch": 0.8950926051101799, + "grad_norm": 0.929369330406189, + "learning_rate": 8.667588703589735e-07, + "loss": 0.1443939208984375, + "step": 13242 + }, + { + "epoch": 0.895160200081114, + "grad_norm": 0.3246736228466034, + "learning_rate": 8.656538506443074e-07, + "loss": 0.04482269287109375, + "step": 13243 + }, + { + "epoch": 0.8952277950520481, + "grad_norm": 0.6205549836158752, + "learning_rate": 8.645495148372084e-07, + "loss": 0.10584259033203125, + "step": 13244 + }, + { + "epoch": 0.8952953900229823, + "grad_norm": 0.38089293241500854, + "learning_rate": 8.634458629911124e-07, + "loss": 0.047061920166015625, + "step": 13245 + }, + { + "epoch": 0.8953629849939164, + "grad_norm": 0.9328065514564514, + "learning_rate": 8.623428951594164e-07, + "loss": 0.128997802734375, + "step": 13246 + }, + { + "epoch": 0.8954305799648506, + "grad_norm": 0.9223230481147766, + "learning_rate": 8.612406113954962e-07, + "loss": 0.1462554931640625, + "step": 13247 + }, + { + "epoch": 0.8954981749357848, + "grad_norm": 0.4742869436740875, + "learning_rate": 8.601390117526809e-07, + "loss": 0.07733917236328125, + "step": 13248 + }, + { + "epoch": 0.895565769906719, + "grad_norm": 1.2895532846450806, + "learning_rate": 8.590380962842776e-07, + "loss": 0.205291748046875, + "step": 13249 + }, + { + "epoch": 0.8956333648776531, + "grad_norm": 0.6418507695198059, + "learning_rate": 8.579378650435487e-07, + "loss": 0.1067962646484375, + "step": 13250 + }, + { + "epoch": 0.8957009598485872, + "grad_norm": 1.0860439538955688, + "learning_rate": 8.568383180837369e-07, + "loss": 0.146881103515625, + "step": 13251 + }, + { + "epoch": 0.8957685548195214, + "grad_norm": 0.6222203969955444, + "learning_rate": 8.557394554580428e-07, + "loss": 0.102935791015625, + "step": 13252 + }, + { + "epoch": 0.8958361497904556, + "grad_norm": 0.5729790925979614, + "learning_rate": 8.546412772196372e-07, + "loss": 0.07289886474609375, + "step": 13253 + }, + { + "epoch": 0.8959037447613898, + "grad_norm": 0.8487589955329895, + "learning_rate": 8.535437834216541e-07, + "loss": 0.1683349609375, + "step": 13254 + }, + { + "epoch": 0.8959713397323239, + "grad_norm": 0.5672615766525269, + "learning_rate": 8.52446974117198e-07, + "loss": 0.08954620361328125, + "step": 13255 + }, + { + "epoch": 0.8960389347032581, + "grad_norm": 0.34497562050819397, + "learning_rate": 8.513508493593414e-07, + "loss": 0.064208984375, + "step": 13256 + }, + { + "epoch": 0.8961065296741922, + "grad_norm": 1.0485327243804932, + "learning_rate": 8.5025540920112e-07, + "loss": 0.1331787109375, + "step": 13257 + }, + { + "epoch": 0.8961741246451264, + "grad_norm": 0.2865007817745209, + "learning_rate": 8.491606536955382e-07, + "loss": 0.039905548095703125, + "step": 13258 + }, + { + "epoch": 0.8962417196160606, + "grad_norm": 0.38282403349876404, + "learning_rate": 8.480665828955653e-07, + "loss": 0.07555007934570312, + "step": 13259 + }, + { + "epoch": 0.8963093145869947, + "grad_norm": 0.5546422600746155, + "learning_rate": 8.469731968541439e-07, + "loss": 0.132904052734375, + "step": 13260 + }, + { + "epoch": 0.8963769095579289, + "grad_norm": 0.45276957750320435, + "learning_rate": 8.458804956241734e-07, + "loss": 0.0770721435546875, + "step": 13261 + }, + { + "epoch": 0.896444504528863, + "grad_norm": 0.2879047095775604, + "learning_rate": 8.447884792585281e-07, + "loss": 0.040660858154296875, + "step": 13262 + }, + { + "epoch": 0.8965120994997973, + "grad_norm": 0.5223404765129089, + "learning_rate": 8.436971478100475e-07, + "loss": 0.07000732421875, + "step": 13263 + }, + { + "epoch": 0.8965796944707314, + "grad_norm": 1.270043134689331, + "learning_rate": 8.426065013315377e-07, + "loss": 0.18084716796875, + "step": 13264 + }, + { + "epoch": 0.8966472894416655, + "grad_norm": 0.9358799457550049, + "learning_rate": 8.415165398757696e-07, + "loss": 0.13418197631835938, + "step": 13265 + }, + { + "epoch": 0.8967148844125997, + "grad_norm": 0.7788902521133423, + "learning_rate": 8.404272634954779e-07, + "loss": 0.09767913818359375, + "step": 13266 + }, + { + "epoch": 0.8967824793835338, + "grad_norm": 0.8662800192832947, + "learning_rate": 8.393386722433755e-07, + "loss": 0.1555938720703125, + "step": 13267 + }, + { + "epoch": 0.8968500743544681, + "grad_norm": 0.7425767779350281, + "learning_rate": 8.382507661721317e-07, + "loss": 0.09762954711914062, + "step": 13268 + }, + { + "epoch": 0.8969176693254022, + "grad_norm": 0.7194715738296509, + "learning_rate": 8.371635453343862e-07, + "loss": 0.1446990966796875, + "step": 13269 + }, + { + "epoch": 0.8969852642963364, + "grad_norm": 0.48375341296195984, + "learning_rate": 8.360770097827469e-07, + "loss": 0.08290481567382812, + "step": 13270 + }, + { + "epoch": 0.8970528592672705, + "grad_norm": 1.1868146657943726, + "learning_rate": 8.349911595697852e-07, + "loss": 0.1728515625, + "step": 13271 + }, + { + "epoch": 0.8971204542382046, + "grad_norm": 0.6392845511436462, + "learning_rate": 8.339059947480421e-07, + "loss": 0.1254730224609375, + "step": 13272 + }, + { + "epoch": 0.8971880492091389, + "grad_norm": 0.4995144307613373, + "learning_rate": 8.328215153700291e-07, + "loss": 0.0828857421875, + "step": 13273 + }, + { + "epoch": 0.897255644180073, + "grad_norm": 0.8896345496177673, + "learning_rate": 8.31737721488211e-07, + "loss": 0.122833251953125, + "step": 13274 + }, + { + "epoch": 0.8973232391510072, + "grad_norm": 2.1811673641204834, + "learning_rate": 8.306546131550341e-07, + "loss": 0.2008056640625, + "step": 13275 + }, + { + "epoch": 0.8973908341219413, + "grad_norm": 0.8795966506004333, + "learning_rate": 8.295721904229098e-07, + "loss": 0.14360809326171875, + "step": 13276 + }, + { + "epoch": 0.8974584290928755, + "grad_norm": 0.8134253621101379, + "learning_rate": 8.284904533442028e-07, + "loss": 0.143890380859375, + "step": 13277 + }, + { + "epoch": 0.8975260240638097, + "grad_norm": 1.0733628273010254, + "learning_rate": 8.274094019712631e-07, + "loss": 0.09521102905273438, + "step": 13278 + }, + { + "epoch": 0.8975936190347438, + "grad_norm": 0.6098222136497498, + "learning_rate": 8.263290363563902e-07, + "loss": 0.10034942626953125, + "step": 13279 + }, + { + "epoch": 0.897661214005678, + "grad_norm": 0.7033374309539795, + "learning_rate": 8.252493565518676e-07, + "loss": 0.1243896484375, + "step": 13280 + }, + { + "epoch": 0.8977288089766121, + "grad_norm": 0.4794938564300537, + "learning_rate": 8.241703626099317e-07, + "loss": 0.09183120727539062, + "step": 13281 + }, + { + "epoch": 0.8977964039475463, + "grad_norm": 0.674366295337677, + "learning_rate": 8.230920545827924e-07, + "loss": 0.1086578369140625, + "step": 13282 + }, + { + "epoch": 0.8978639989184805, + "grad_norm": 0.9914454817771912, + "learning_rate": 8.220144325226248e-07, + "loss": 0.09857177734375, + "step": 13283 + }, + { + "epoch": 0.8979315938894147, + "grad_norm": 0.6334611773490906, + "learning_rate": 8.209374964815702e-07, + "loss": 0.08997344970703125, + "step": 13284 + }, + { + "epoch": 0.8979991888603488, + "grad_norm": 0.3819732964038849, + "learning_rate": 8.198612465117372e-07, + "loss": 0.06388092041015625, + "step": 13285 + }, + { + "epoch": 0.8980667838312829, + "grad_norm": 0.7792873382568359, + "learning_rate": 8.187856826652057e-07, + "loss": 0.122283935546875, + "step": 13286 + }, + { + "epoch": 0.8981343788022171, + "grad_norm": 0.2946110963821411, + "learning_rate": 8.177108049940108e-07, + "loss": 0.05319786071777344, + "step": 13287 + }, + { + "epoch": 0.8982019737731513, + "grad_norm": 0.3641682267189026, + "learning_rate": 8.166366135501691e-07, + "loss": 0.0636444091796875, + "step": 13288 + }, + { + "epoch": 0.8982695687440855, + "grad_norm": 0.4422146677970886, + "learning_rate": 8.155631083856541e-07, + "loss": 0.0824432373046875, + "step": 13289 + }, + { + "epoch": 0.8983371637150196, + "grad_norm": 0.4084430932998657, + "learning_rate": 8.144902895524042e-07, + "loss": 0.0644989013671875, + "step": 13290 + }, + { + "epoch": 0.8984047586859538, + "grad_norm": 0.683455228805542, + "learning_rate": 8.134181571023363e-07, + "loss": 0.10453033447265625, + "step": 13291 + }, + { + "epoch": 0.8984723536568879, + "grad_norm": 0.7249211668968201, + "learning_rate": 8.123467110873223e-07, + "loss": 0.11145782470703125, + "step": 13292 + }, + { + "epoch": 0.898539948627822, + "grad_norm": 0.6204757690429688, + "learning_rate": 8.112759515592055e-07, + "loss": 0.08285140991210938, + "step": 13293 + }, + { + "epoch": 0.8986075435987563, + "grad_norm": 1.5804823637008667, + "learning_rate": 8.10205878569798e-07, + "loss": 0.199493408203125, + "step": 13294 + }, + { + "epoch": 0.8986751385696904, + "grad_norm": 0.9529973864555359, + "learning_rate": 8.09136492170875e-07, + "loss": 0.11046409606933594, + "step": 13295 + }, + { + "epoch": 0.8987427335406246, + "grad_norm": 1.3380509614944458, + "learning_rate": 8.080677924141816e-07, + "loss": 0.219512939453125, + "step": 13296 + }, + { + "epoch": 0.8988103285115587, + "grad_norm": 0.5465871691703796, + "learning_rate": 8.069997793514267e-07, + "loss": 0.0704193115234375, + "step": 13297 + }, + { + "epoch": 0.898877923482493, + "grad_norm": 0.559325635433197, + "learning_rate": 8.059324530342854e-07, + "loss": 0.077484130859375, + "step": 13298 + }, + { + "epoch": 0.8989455184534271, + "grad_norm": 0.5331661701202393, + "learning_rate": 8.048658135144066e-07, + "loss": 0.0885772705078125, + "step": 13299 + }, + { + "epoch": 0.8990131134243612, + "grad_norm": 1.3558285236358643, + "learning_rate": 8.037998608433988e-07, + "loss": 0.2104949951171875, + "step": 13300 + }, + { + "epoch": 0.8990807083952954, + "grad_norm": 0.28789249062538147, + "learning_rate": 8.02734595072836e-07, + "loss": 0.05438995361328125, + "step": 13301 + }, + { + "epoch": 0.8991483033662295, + "grad_norm": 0.9795324206352234, + "learning_rate": 8.016700162542683e-07, + "loss": 0.1409759521484375, + "step": 13302 + }, + { + "epoch": 0.8992158983371638, + "grad_norm": 0.3111892640590668, + "learning_rate": 8.006061244392016e-07, + "loss": 0.04015350341796875, + "step": 13303 + }, + { + "epoch": 0.8992834933080979, + "grad_norm": 0.34259501099586487, + "learning_rate": 7.995429196791159e-07, + "loss": 0.0562591552734375, + "step": 13304 + }, + { + "epoch": 0.899351088279032, + "grad_norm": 1.1416581869125366, + "learning_rate": 7.984804020254555e-07, + "loss": 0.2000732421875, + "step": 13305 + }, + { + "epoch": 0.8994186832499662, + "grad_norm": 1.0310542583465576, + "learning_rate": 7.974185715296322e-07, + "loss": 0.167694091796875, + "step": 13306 + }, + { + "epoch": 0.8994862782209003, + "grad_norm": 1.2695380449295044, + "learning_rate": 7.963574282430236e-07, + "loss": 0.1454925537109375, + "step": 13307 + }, + { + "epoch": 0.8995538731918346, + "grad_norm": 1.0256710052490234, + "learning_rate": 7.952969722169751e-07, + "loss": 0.16754150390625, + "step": 13308 + }, + { + "epoch": 0.8996214681627687, + "grad_norm": 1.0473490953445435, + "learning_rate": 7.942372035027939e-07, + "loss": 0.1801605224609375, + "step": 13309 + }, + { + "epoch": 0.8996890631337029, + "grad_norm": 1.0224257707595825, + "learning_rate": 7.931781221517642e-07, + "loss": 0.171112060546875, + "step": 13310 + }, + { + "epoch": 0.899756658104637, + "grad_norm": 0.6574631929397583, + "learning_rate": 7.921197282151283e-07, + "loss": 0.08034896850585938, + "step": 13311 + }, + { + "epoch": 0.8998242530755711, + "grad_norm": 0.34629908204078674, + "learning_rate": 7.910620217440966e-07, + "loss": 0.0534820556640625, + "step": 13312 + }, + { + "epoch": 0.8998918480465053, + "grad_norm": 0.7944788932800293, + "learning_rate": 7.900050027898503e-07, + "loss": 0.125030517578125, + "step": 13313 + }, + { + "epoch": 0.8999594430174395, + "grad_norm": 0.6500458717346191, + "learning_rate": 7.889486714035299e-07, + "loss": 0.10634613037109375, + "step": 13314 + }, + { + "epoch": 0.9000270379883737, + "grad_norm": 0.7877338528633118, + "learning_rate": 7.878930276362545e-07, + "loss": 0.14141845703125, + "step": 13315 + }, + { + "epoch": 0.9000946329593078, + "grad_norm": 0.28229013085365295, + "learning_rate": 7.868380715390932e-07, + "loss": 0.05255126953125, + "step": 13316 + }, + { + "epoch": 0.900162227930242, + "grad_norm": 0.9754597544670105, + "learning_rate": 7.857838031631004e-07, + "loss": 0.158905029296875, + "step": 13317 + }, + { + "epoch": 0.9002298229011761, + "grad_norm": 0.8863787651062012, + "learning_rate": 7.847302225592834e-07, + "loss": 0.12108039855957031, + "step": 13318 + }, + { + "epoch": 0.9002974178721103, + "grad_norm": 0.891253650188446, + "learning_rate": 7.836773297786231e-07, + "loss": 0.13222503662109375, + "step": 13319 + }, + { + "epoch": 0.9003650128430445, + "grad_norm": 0.7250775098800659, + "learning_rate": 7.826251248720639e-07, + "loss": 0.08795547485351562, + "step": 13320 + }, + { + "epoch": 0.9004326078139786, + "grad_norm": 0.32144442200660706, + "learning_rate": 7.815736078905167e-07, + "loss": 0.0611419677734375, + "step": 13321 + }, + { + "epoch": 0.9005002027849128, + "grad_norm": 1.5394366979599, + "learning_rate": 7.805227788848607e-07, + "loss": 0.1779632568359375, + "step": 13322 + }, + { + "epoch": 0.9005677977558469, + "grad_norm": 0.3259965777397156, + "learning_rate": 7.794726379059469e-07, + "loss": 0.055179595947265625, + "step": 13323 + }, + { + "epoch": 0.9006353927267812, + "grad_norm": 0.6056432127952576, + "learning_rate": 7.784231850045815e-07, + "loss": 0.12828826904296875, + "step": 13324 + }, + { + "epoch": 0.9007029876977153, + "grad_norm": 0.24082036316394806, + "learning_rate": 7.773744202315453e-07, + "loss": 0.030466079711914062, + "step": 13325 + }, + { + "epoch": 0.9007705826686494, + "grad_norm": 0.3165663480758667, + "learning_rate": 7.763263436375845e-07, + "loss": 0.061309814453125, + "step": 13326 + }, + { + "epoch": 0.9008381776395836, + "grad_norm": 0.42875465750694275, + "learning_rate": 7.752789552734085e-07, + "loss": 0.08941268920898438, + "step": 13327 + }, + { + "epoch": 0.9009057726105177, + "grad_norm": 0.5762291550636292, + "learning_rate": 7.742322551897036e-07, + "loss": 0.08399200439453125, + "step": 13328 + }, + { + "epoch": 0.900973367581452, + "grad_norm": 1.168309211730957, + "learning_rate": 7.731862434371074e-07, + "loss": 0.13016128540039062, + "step": 13329 + }, + { + "epoch": 0.9010409625523861, + "grad_norm": 1.3900734186172485, + "learning_rate": 7.721409200662377e-07, + "loss": 0.2191162109375, + "step": 13330 + }, + { + "epoch": 0.9011085575233203, + "grad_norm": 0.7748521566390991, + "learning_rate": 7.710962851276726e-07, + "loss": 0.107147216796875, + "step": 13331 + }, + { + "epoch": 0.9011761524942544, + "grad_norm": 0.20402519404888153, + "learning_rate": 7.700523386719583e-07, + "loss": 0.023693084716796875, + "step": 13332 + }, + { + "epoch": 0.9012437474651885, + "grad_norm": 0.8511567711830139, + "learning_rate": 7.690090807496075e-07, + "loss": 0.148345947265625, + "step": 13333 + }, + { + "epoch": 0.9013113424361228, + "grad_norm": 0.25275155901908875, + "learning_rate": 7.679665114110984e-07, + "loss": 0.03687095642089844, + "step": 13334 + }, + { + "epoch": 0.9013789374070569, + "grad_norm": 0.517930269241333, + "learning_rate": 7.669246307068773e-07, + "loss": 0.081939697265625, + "step": 13335 + }, + { + "epoch": 0.9014465323779911, + "grad_norm": 0.723676860332489, + "learning_rate": 7.65883438687357e-07, + "loss": 0.0928192138671875, + "step": 13336 + }, + { + "epoch": 0.9015141273489252, + "grad_norm": 0.6132466197013855, + "learning_rate": 7.648429354029173e-07, + "loss": 0.10860443115234375, + "step": 13337 + }, + { + "epoch": 0.9015817223198594, + "grad_norm": 0.40248799324035645, + "learning_rate": 7.638031209039015e-07, + "loss": 0.0767822265625, + "step": 13338 + }, + { + "epoch": 0.9016493172907936, + "grad_norm": 0.5144125819206238, + "learning_rate": 7.62763995240629e-07, + "loss": 0.08737945556640625, + "step": 13339 + }, + { + "epoch": 0.9017169122617277, + "grad_norm": 0.575108528137207, + "learning_rate": 7.617255584633698e-07, + "loss": 0.0941314697265625, + "step": 13340 + }, + { + "epoch": 0.9017845072326619, + "grad_norm": 0.7192500233650208, + "learning_rate": 7.606878106223802e-07, + "loss": 0.1311798095703125, + "step": 13341 + }, + { + "epoch": 0.901852102203596, + "grad_norm": 1.086145043373108, + "learning_rate": 7.596507517678636e-07, + "loss": 0.12701416015625, + "step": 13342 + }, + { + "epoch": 0.9019196971745302, + "grad_norm": 0.39609822630882263, + "learning_rate": 7.586143819500046e-07, + "loss": 0.06995391845703125, + "step": 13343 + }, + { + "epoch": 0.9019872921454644, + "grad_norm": 0.22081975638866425, + "learning_rate": 7.575787012189484e-07, + "loss": 0.028606414794921875, + "step": 13344 + }, + { + "epoch": 0.9020548871163986, + "grad_norm": 0.2738274037837982, + "learning_rate": 7.565437096248078e-07, + "loss": 0.054447174072265625, + "step": 13345 + }, + { + "epoch": 0.9021224820873327, + "grad_norm": 0.3218607008457184, + "learning_rate": 7.555094072176616e-07, + "loss": 0.051090240478515625, + "step": 13346 + }, + { + "epoch": 0.9021900770582668, + "grad_norm": 1.0900979042053223, + "learning_rate": 7.544757940475544e-07, + "loss": 0.16388702392578125, + "step": 13347 + }, + { + "epoch": 0.902257672029201, + "grad_norm": 0.46031665802001953, + "learning_rate": 7.534428701645014e-07, + "loss": 0.0799713134765625, + "step": 13348 + }, + { + "epoch": 0.9023252670001352, + "grad_norm": 0.4988730549812317, + "learning_rate": 7.524106356184807e-07, + "loss": 0.081695556640625, + "step": 13349 + }, + { + "epoch": 0.9023928619710694, + "grad_norm": 0.38380706310272217, + "learning_rate": 7.513790904594392e-07, + "loss": 0.05675506591796875, + "step": 13350 + }, + { + "epoch": 0.9024604569420035, + "grad_norm": 1.288555383682251, + "learning_rate": 7.503482347372853e-07, + "loss": 0.23291015625, + "step": 13351 + }, + { + "epoch": 0.9025280519129377, + "grad_norm": 0.9789237976074219, + "learning_rate": 7.493180685019057e-07, + "loss": 0.1497955322265625, + "step": 13352 + }, + { + "epoch": 0.9025956468838718, + "grad_norm": 0.5410807132720947, + "learning_rate": 7.482885918031374e-07, + "loss": 0.0997314453125, + "step": 13353 + }, + { + "epoch": 0.902663241854806, + "grad_norm": 0.6525492668151855, + "learning_rate": 7.472598046908003e-07, + "loss": 0.09755134582519531, + "step": 13354 + }, + { + "epoch": 0.9027308368257402, + "grad_norm": 0.4131160378456116, + "learning_rate": 7.462317072146713e-07, + "loss": 0.073028564453125, + "step": 13355 + }, + { + "epoch": 0.9027984317966743, + "grad_norm": 0.6256008744239807, + "learning_rate": 7.45204299424494e-07, + "loss": 0.116912841796875, + "step": 13356 + }, + { + "epoch": 0.9028660267676085, + "grad_norm": 0.4135212004184723, + "learning_rate": 7.441775813699836e-07, + "loss": 0.0786590576171875, + "step": 13357 + }, + { + "epoch": 0.9029336217385426, + "grad_norm": 0.7122069597244263, + "learning_rate": 7.431515531008171e-07, + "loss": 0.12465286254882812, + "step": 13358 + }, + { + "epoch": 0.9030012167094769, + "grad_norm": 1.1752369403839111, + "learning_rate": 7.421262146666414e-07, + "loss": 0.13258743286132812, + "step": 13359 + }, + { + "epoch": 0.903068811680411, + "grad_norm": 0.31964677572250366, + "learning_rate": 7.411015661170667e-07, + "loss": 0.0654296875, + "step": 13360 + }, + { + "epoch": 0.9031364066513451, + "grad_norm": 0.7103529572486877, + "learning_rate": 7.400776075016752e-07, + "loss": 0.12279510498046875, + "step": 13361 + }, + { + "epoch": 0.9032040016222793, + "grad_norm": 0.7221530079841614, + "learning_rate": 7.390543388700088e-07, + "loss": 0.1629180908203125, + "step": 13362 + }, + { + "epoch": 0.9032715965932134, + "grad_norm": 0.9834632277488708, + "learning_rate": 7.38031760271583e-07, + "loss": 0.156036376953125, + "step": 13363 + }, + { + "epoch": 0.9033391915641477, + "grad_norm": 0.45168131589889526, + "learning_rate": 7.370098717558716e-07, + "loss": 0.086517333984375, + "step": 13364 + }, + { + "epoch": 0.9034067865350818, + "grad_norm": 1.5645357370376587, + "learning_rate": 7.359886733723281e-07, + "loss": 0.15502166748046875, + "step": 13365 + }, + { + "epoch": 0.903474381506016, + "grad_norm": 0.8622766733169556, + "learning_rate": 7.349681651703549e-07, + "loss": 0.11473464965820312, + "step": 13366 + }, + { + "epoch": 0.9035419764769501, + "grad_norm": 0.47077804803848267, + "learning_rate": 7.339483471993375e-07, + "loss": 0.07842254638671875, + "step": 13367 + }, + { + "epoch": 0.9036095714478842, + "grad_norm": 1.4101108312606812, + "learning_rate": 7.329292195086196e-07, + "loss": 0.18475341796875, + "step": 13368 + }, + { + "epoch": 0.9036771664188185, + "grad_norm": 0.27499860525131226, + "learning_rate": 7.319107821475101e-07, + "loss": 0.05646514892578125, + "step": 13369 + }, + { + "epoch": 0.9037447613897526, + "grad_norm": 1.3359397649765015, + "learning_rate": 7.308930351652932e-07, + "loss": 0.2020263671875, + "step": 13370 + }, + { + "epoch": 0.9038123563606868, + "grad_norm": 0.9006240367889404, + "learning_rate": 7.298759786112041e-07, + "loss": 0.09528350830078125, + "step": 13371 + }, + { + "epoch": 0.9038799513316209, + "grad_norm": 0.8669939041137695, + "learning_rate": 7.288596125344638e-07, + "loss": 0.140380859375, + "step": 13372 + }, + { + "epoch": 0.9039475463025551, + "grad_norm": 0.9496890306472778, + "learning_rate": 7.27843936984246e-07, + "loss": 0.192596435546875, + "step": 13373 + }, + { + "epoch": 0.9040151412734893, + "grad_norm": 0.5025438070297241, + "learning_rate": 7.268289520096949e-07, + "loss": 0.0912322998046875, + "step": 13374 + }, + { + "epoch": 0.9040827362444234, + "grad_norm": 0.41891464591026306, + "learning_rate": 7.258146576599244e-07, + "loss": 0.064208984375, + "step": 13375 + }, + { + "epoch": 0.9041503312153576, + "grad_norm": 1.1090539693832397, + "learning_rate": 7.248010539840105e-07, + "loss": 0.1827239990234375, + "step": 13376 + }, + { + "epoch": 0.9042179261862917, + "grad_norm": 0.22522875666618347, + "learning_rate": 7.237881410309971e-07, + "loss": 0.032093048095703125, + "step": 13377 + }, + { + "epoch": 0.9042855211572259, + "grad_norm": 0.34555691480636597, + "learning_rate": 7.227759188498983e-07, + "loss": 0.047061920166015625, + "step": 13378 + }, + { + "epoch": 0.90435311612816, + "grad_norm": 0.8753848075866699, + "learning_rate": 7.217643874896868e-07, + "loss": 0.10666656494140625, + "step": 13379 + }, + { + "epoch": 0.9044207110990943, + "grad_norm": 0.23359663784503937, + "learning_rate": 7.207535469993115e-07, + "loss": 0.030378341674804688, + "step": 13380 + }, + { + "epoch": 0.9044883060700284, + "grad_norm": 0.9881435036659241, + "learning_rate": 7.197433974276835e-07, + "loss": 0.12150955200195312, + "step": 13381 + }, + { + "epoch": 0.9045559010409625, + "grad_norm": 1.1520839929580688, + "learning_rate": 7.187339388236735e-07, + "loss": 0.1862640380859375, + "step": 13382 + }, + { + "epoch": 0.9046234960118967, + "grad_norm": 0.7556099891662598, + "learning_rate": 7.177251712361326e-07, + "loss": 0.146636962890625, + "step": 13383 + }, + { + "epoch": 0.9046910909828308, + "grad_norm": 1.103463888168335, + "learning_rate": 7.167170947138685e-07, + "loss": 0.160186767578125, + "step": 13384 + }, + { + "epoch": 0.9047586859537651, + "grad_norm": 0.7604984641075134, + "learning_rate": 7.15709709305657e-07, + "loss": 0.12591552734375, + "step": 13385 + }, + { + "epoch": 0.9048262809246992, + "grad_norm": 0.8725796937942505, + "learning_rate": 7.147030150602424e-07, + "loss": 0.13861083984375, + "step": 13386 + }, + { + "epoch": 0.9048938758956334, + "grad_norm": 0.8844534754753113, + "learning_rate": 7.136970120263375e-07, + "loss": 0.1400146484375, + "step": 13387 + }, + { + "epoch": 0.9049614708665675, + "grad_norm": 0.8063991665840149, + "learning_rate": 7.126917002526151e-07, + "loss": 0.09929656982421875, + "step": 13388 + }, + { + "epoch": 0.9050290658375016, + "grad_norm": 0.7406164407730103, + "learning_rate": 7.116870797877195e-07, + "loss": 0.10247278213500977, + "step": 13389 + }, + { + "epoch": 0.9050966608084359, + "grad_norm": 0.6915115714073181, + "learning_rate": 7.106831506802602e-07, + "loss": 0.10540771484375, + "step": 13390 + }, + { + "epoch": 0.90516425577937, + "grad_norm": 0.2127971649169922, + "learning_rate": 7.096799129788184e-07, + "loss": 0.031673431396484375, + "step": 13391 + }, + { + "epoch": 0.9052318507503042, + "grad_norm": 1.148668885231018, + "learning_rate": 7.086773667319285e-07, + "loss": 0.18475341796875, + "step": 13392 + }, + { + "epoch": 0.9052994457212383, + "grad_norm": 0.2597537934780121, + "learning_rate": 7.076755119881067e-07, + "loss": 0.040531158447265625, + "step": 13393 + }, + { + "epoch": 0.9053670406921726, + "grad_norm": 0.9139378070831299, + "learning_rate": 7.066743487958278e-07, + "loss": 0.15667724609375, + "step": 13394 + }, + { + "epoch": 0.9054346356631067, + "grad_norm": 1.060473918914795, + "learning_rate": 7.056738772035293e-07, + "loss": 0.148834228515625, + "step": 13395 + }, + { + "epoch": 0.9055022306340408, + "grad_norm": 0.8941632509231567, + "learning_rate": 7.046740972596277e-07, + "loss": 0.2078399658203125, + "step": 13396 + }, + { + "epoch": 0.905569825604975, + "grad_norm": 0.3203384578227997, + "learning_rate": 7.036750090124927e-07, + "loss": 0.0823516845703125, + "step": 13397 + }, + { + "epoch": 0.9056374205759091, + "grad_norm": 1.4965733289718628, + "learning_rate": 7.026766125104706e-07, + "loss": 0.191650390625, + "step": 13398 + }, + { + "epoch": 0.9057050155468434, + "grad_norm": 0.6693249344825745, + "learning_rate": 7.016789078018676e-07, + "loss": 0.14862060546875, + "step": 13399 + }, + { + "epoch": 0.9057726105177775, + "grad_norm": 0.19867560267448425, + "learning_rate": 7.006818949349586e-07, + "loss": 0.027757644653320312, + "step": 13400 + }, + { + "epoch": 0.9058402054887117, + "grad_norm": 0.4703410863876343, + "learning_rate": 6.99685573957985e-07, + "loss": 0.1062469482421875, + "step": 13401 + }, + { + "epoch": 0.9059078004596458, + "grad_norm": 0.280360609292984, + "learning_rate": 6.98689944919158e-07, + "loss": 0.0593414306640625, + "step": 13402 + }, + { + "epoch": 0.9059753954305799, + "grad_norm": 0.30764240026474, + "learning_rate": 6.976950078666478e-07, + "loss": 0.0481719970703125, + "step": 13403 + }, + { + "epoch": 0.9060429904015141, + "grad_norm": 0.7696714997291565, + "learning_rate": 6.967007628486022e-07, + "loss": 0.1264190673828125, + "step": 13404 + }, + { + "epoch": 0.9061105853724483, + "grad_norm": 0.7139776349067688, + "learning_rate": 6.957072099131229e-07, + "loss": 0.127777099609375, + "step": 13405 + }, + { + "epoch": 0.9061781803433825, + "grad_norm": 1.0559154748916626, + "learning_rate": 6.947143491082846e-07, + "loss": 0.167449951171875, + "step": 13406 + }, + { + "epoch": 0.9062457753143166, + "grad_norm": 0.17756393551826477, + "learning_rate": 6.937221804821342e-07, + "loss": 0.02433013916015625, + "step": 13407 + }, + { + "epoch": 0.9063133702852508, + "grad_norm": 0.5172504186630249, + "learning_rate": 6.927307040826697e-07, + "loss": 0.081268310546875, + "step": 13408 + }, + { + "epoch": 0.906380965256185, + "grad_norm": 1.2790305614471436, + "learning_rate": 6.917399199578711e-07, + "loss": 0.1181483268737793, + "step": 13409 + }, + { + "epoch": 0.9064485602271191, + "grad_norm": 0.33938297629356384, + "learning_rate": 6.907498281556784e-07, + "loss": 0.044254302978515625, + "step": 13410 + }, + { + "epoch": 0.9065161551980533, + "grad_norm": 0.7641958594322205, + "learning_rate": 6.897604287239967e-07, + "loss": 0.11994552612304688, + "step": 13411 + }, + { + "epoch": 0.9065837501689874, + "grad_norm": 0.4362708628177643, + "learning_rate": 6.887717217106993e-07, + "loss": 0.06309127807617188, + "step": 13412 + }, + { + "epoch": 0.9066513451399216, + "grad_norm": 0.4727533161640167, + "learning_rate": 6.877837071636278e-07, + "loss": 0.08512115478515625, + "step": 13413 + }, + { + "epoch": 0.9067189401108557, + "grad_norm": 0.35300588607788086, + "learning_rate": 6.867963851305842e-07, + "loss": 0.0790863037109375, + "step": 13414 + }, + { + "epoch": 0.90678653508179, + "grad_norm": 0.6792633533477783, + "learning_rate": 6.858097556593484e-07, + "loss": 0.10135269165039062, + "step": 13415 + }, + { + "epoch": 0.9068541300527241, + "grad_norm": 0.45521387457847595, + "learning_rate": 6.848238187976541e-07, + "loss": 0.10467529296875, + "step": 13416 + }, + { + "epoch": 0.9069217250236582, + "grad_norm": 0.700046718120575, + "learning_rate": 6.838385745932097e-07, + "loss": 0.0992431640625, + "step": 13417 + }, + { + "epoch": 0.9069893199945924, + "grad_norm": 0.5572392344474792, + "learning_rate": 6.828540230936853e-07, + "loss": 0.08548355102539062, + "step": 13418 + }, + { + "epoch": 0.9070569149655265, + "grad_norm": 2.044879913330078, + "learning_rate": 6.818701643467179e-07, + "loss": 0.280059814453125, + "step": 13419 + }, + { + "epoch": 0.9071245099364608, + "grad_norm": 0.464697003364563, + "learning_rate": 6.808869983999194e-07, + "loss": 0.05950164794921875, + "step": 13420 + }, + { + "epoch": 0.9071921049073949, + "grad_norm": 0.7961733341217041, + "learning_rate": 6.799045253008551e-07, + "loss": 0.132659912109375, + "step": 13421 + }, + { + "epoch": 0.9072596998783291, + "grad_norm": 0.8401325941085815, + "learning_rate": 6.789227450970653e-07, + "loss": 0.0953216552734375, + "step": 13422 + }, + { + "epoch": 0.9073272948492632, + "grad_norm": 1.479296326637268, + "learning_rate": 6.77941657836057e-07, + "loss": 0.16574859619140625, + "step": 13423 + }, + { + "epoch": 0.9073948898201973, + "grad_norm": 0.6060823798179626, + "learning_rate": 6.769612635652989e-07, + "loss": 0.07834529876708984, + "step": 13424 + }, + { + "epoch": 0.9074624847911316, + "grad_norm": 0.2278364896774292, + "learning_rate": 6.759815623322279e-07, + "loss": 0.04000091552734375, + "step": 13425 + }, + { + "epoch": 0.9075300797620657, + "grad_norm": 1.280318021774292, + "learning_rate": 6.750025541842497e-07, + "loss": 0.16106414794921875, + "step": 13426 + }, + { + "epoch": 0.9075976747329999, + "grad_norm": 0.8178768754005432, + "learning_rate": 6.740242391687313e-07, + "loss": 0.174072265625, + "step": 13427 + }, + { + "epoch": 0.907665269703934, + "grad_norm": 0.551310658454895, + "learning_rate": 6.73046617333018e-07, + "loss": 0.07036209106445312, + "step": 13428 + }, + { + "epoch": 0.9077328646748682, + "grad_norm": 0.7654678225517273, + "learning_rate": 6.720696887244055e-07, + "loss": 0.1125946044921875, + "step": 13429 + }, + { + "epoch": 0.9078004596458024, + "grad_norm": 1.8857331275939941, + "learning_rate": 6.710934533901625e-07, + "loss": 0.23712158203125, + "step": 13430 + }, + { + "epoch": 0.9078680546167365, + "grad_norm": 0.40013718605041504, + "learning_rate": 6.70117911377533e-07, + "loss": 0.06456756591796875, + "step": 13431 + }, + { + "epoch": 0.9079356495876707, + "grad_norm": 0.4995250999927521, + "learning_rate": 6.691430627337126e-07, + "loss": 0.113067626953125, + "step": 13432 + }, + { + "epoch": 0.9080032445586048, + "grad_norm": 0.4454953074455261, + "learning_rate": 6.681689075058766e-07, + "loss": 0.055027008056640625, + "step": 13433 + }, + { + "epoch": 0.908070839529539, + "grad_norm": 0.1967778354883194, + "learning_rate": 6.671954457411544e-07, + "loss": 0.030168533325195312, + "step": 13434 + }, + { + "epoch": 0.9081384345004732, + "grad_norm": 0.6849767565727234, + "learning_rate": 6.662226774866531e-07, + "loss": 0.1175689697265625, + "step": 13435 + }, + { + "epoch": 0.9082060294714073, + "grad_norm": 0.32245656847953796, + "learning_rate": 6.652506027894384e-07, + "loss": 0.06409835815429688, + "step": 13436 + }, + { + "epoch": 0.9082736244423415, + "grad_norm": 0.33471789956092834, + "learning_rate": 6.642792216965476e-07, + "loss": 0.06099700927734375, + "step": 13437 + }, + { + "epoch": 0.9083412194132756, + "grad_norm": 1.2428500652313232, + "learning_rate": 6.633085342549799e-07, + "loss": 0.1805267333984375, + "step": 13438 + }, + { + "epoch": 0.9084088143842098, + "grad_norm": 0.3914985656738281, + "learning_rate": 6.623385405117061e-07, + "loss": 0.08060455322265625, + "step": 13439 + }, + { + "epoch": 0.908476409355144, + "grad_norm": 0.5418335795402527, + "learning_rate": 6.613692405136568e-07, + "loss": 0.1005859375, + "step": 13440 + }, + { + "epoch": 0.9085440043260782, + "grad_norm": 0.7152999639511108, + "learning_rate": 6.604006343077345e-07, + "loss": 0.12035369873046875, + "step": 13441 + }, + { + "epoch": 0.9086115992970123, + "grad_norm": 0.3127141296863556, + "learning_rate": 6.594327219408086e-07, + "loss": 0.04886627197265625, + "step": 13442 + }, + { + "epoch": 0.9086791942679464, + "grad_norm": 0.7882065773010254, + "learning_rate": 6.584655034597064e-07, + "loss": 0.13216018676757812, + "step": 13443 + }, + { + "epoch": 0.9087467892388806, + "grad_norm": 0.6945440173149109, + "learning_rate": 6.574989789112374e-07, + "loss": 0.133148193359375, + "step": 13444 + }, + { + "epoch": 0.9088143842098148, + "grad_norm": 0.4927682876586914, + "learning_rate": 6.565331483421588e-07, + "loss": 0.08629608154296875, + "step": 13445 + }, + { + "epoch": 0.908881979180749, + "grad_norm": 1.1554361581802368, + "learning_rate": 6.555680117992085e-07, + "loss": 0.1672821044921875, + "step": 13446 + }, + { + "epoch": 0.9089495741516831, + "grad_norm": 1.974887728691101, + "learning_rate": 6.546035693290858e-07, + "loss": 0.13998031616210938, + "step": 13447 + }, + { + "epoch": 0.9090171691226173, + "grad_norm": 1.2256181240081787, + "learning_rate": 6.536398209784567e-07, + "loss": 0.196136474609375, + "step": 13448 + }, + { + "epoch": 0.9090847640935514, + "grad_norm": 0.6446714401245117, + "learning_rate": 6.526767667939504e-07, + "loss": 0.13592529296875, + "step": 13449 + }, + { + "epoch": 0.9091523590644855, + "grad_norm": 0.28044626116752625, + "learning_rate": 6.517144068221664e-07, + "loss": 0.036373138427734375, + "step": 13450 + }, + { + "epoch": 0.9092199540354198, + "grad_norm": 0.5115289092063904, + "learning_rate": 6.507527411096725e-07, + "loss": 0.06968307495117188, + "step": 13451 + }, + { + "epoch": 0.9092875490063539, + "grad_norm": 0.5391450524330139, + "learning_rate": 6.497917697029965e-07, + "loss": 0.06982421875, + "step": 13452 + }, + { + "epoch": 0.9093551439772881, + "grad_norm": 0.6471445560455322, + "learning_rate": 6.488314926486377e-07, + "loss": 0.11322021484375, + "step": 13453 + }, + { + "epoch": 0.9094227389482222, + "grad_norm": 0.8918081521987915, + "learning_rate": 6.478719099930608e-07, + "loss": 0.1099090576171875, + "step": 13454 + }, + { + "epoch": 0.9094903339191565, + "grad_norm": 1.0863757133483887, + "learning_rate": 6.469130217826969e-07, + "loss": 0.1261749267578125, + "step": 13455 + }, + { + "epoch": 0.9095579288900906, + "grad_norm": 0.8004049062728882, + "learning_rate": 6.459548280639388e-07, + "loss": 0.13275146484375, + "step": 13456 + }, + { + "epoch": 0.9096255238610247, + "grad_norm": 0.4992378354072571, + "learning_rate": 6.449973288831562e-07, + "loss": 0.10302734375, + "step": 13457 + }, + { + "epoch": 0.9096931188319589, + "grad_norm": 1.0367310047149658, + "learning_rate": 6.440405242866721e-07, + "loss": 0.1751251220703125, + "step": 13458 + }, + { + "epoch": 0.909760713802893, + "grad_norm": 1.2751837968826294, + "learning_rate": 6.430844143207892e-07, + "loss": 0.2040252685546875, + "step": 13459 + }, + { + "epoch": 0.9098283087738273, + "grad_norm": 1.6845170259475708, + "learning_rate": 6.421289990317658e-07, + "loss": 0.196075439453125, + "step": 13460 + }, + { + "epoch": 0.9098959037447614, + "grad_norm": 1.0569264888763428, + "learning_rate": 6.41174278465833e-07, + "loss": 0.1632843017578125, + "step": 13461 + }, + { + "epoch": 0.9099634987156956, + "grad_norm": 0.8033064603805542, + "learning_rate": 6.402202526691842e-07, + "loss": 0.12560272216796875, + "step": 13462 + }, + { + "epoch": 0.9100310936866297, + "grad_norm": 0.3035460412502289, + "learning_rate": 6.392669216879837e-07, + "loss": 0.0299835205078125, + "step": 13463 + }, + { + "epoch": 0.9100986886575638, + "grad_norm": 0.5028682947158813, + "learning_rate": 6.383142855683566e-07, + "loss": 0.06716156005859375, + "step": 13464 + }, + { + "epoch": 0.910166283628498, + "grad_norm": 0.8408454060554504, + "learning_rate": 6.373623443563992e-07, + "loss": 0.111480712890625, + "step": 13465 + }, + { + "epoch": 0.9102338785994322, + "grad_norm": 0.3782229423522949, + "learning_rate": 6.364110980981713e-07, + "loss": 0.05834197998046875, + "step": 13466 + }, + { + "epoch": 0.9103014735703664, + "grad_norm": 0.7645580172538757, + "learning_rate": 6.354605468397013e-07, + "loss": 0.176025390625, + "step": 13467 + }, + { + "epoch": 0.9103690685413005, + "grad_norm": 0.3320005536079407, + "learning_rate": 6.345106906269821e-07, + "loss": 0.03800773620605469, + "step": 13468 + }, + { + "epoch": 0.9104366635122347, + "grad_norm": 0.6085425019264221, + "learning_rate": 6.335615295059721e-07, + "loss": 0.0823516845703125, + "step": 13469 + }, + { + "epoch": 0.9105042584831688, + "grad_norm": 0.9155243039131165, + "learning_rate": 6.326130635226013e-07, + "loss": 0.1396484375, + "step": 13470 + }, + { + "epoch": 0.910571853454103, + "grad_norm": 0.3447045683860779, + "learning_rate": 6.316652927227579e-07, + "loss": 0.044528961181640625, + "step": 13471 + }, + { + "epoch": 0.9106394484250372, + "grad_norm": 1.2546861171722412, + "learning_rate": 6.307182171523052e-07, + "loss": 0.177825927734375, + "step": 13472 + }, + { + "epoch": 0.9107070433959713, + "grad_norm": 0.890699565410614, + "learning_rate": 6.297718368570665e-07, + "loss": 0.1610260009765625, + "step": 13473 + }, + { + "epoch": 0.9107746383669055, + "grad_norm": 0.3277960419654846, + "learning_rate": 6.288261518828353e-07, + "loss": 0.042758941650390625, + "step": 13474 + }, + { + "epoch": 0.9108422333378396, + "grad_norm": 0.6897342205047607, + "learning_rate": 6.278811622753666e-07, + "loss": 0.11829376220703125, + "step": 13475 + }, + { + "epoch": 0.9109098283087739, + "grad_norm": 0.6891263127326965, + "learning_rate": 6.269368680803872e-07, + "loss": 0.1444854736328125, + "step": 13476 + }, + { + "epoch": 0.910977423279708, + "grad_norm": 0.3216729164123535, + "learning_rate": 6.259932693435854e-07, + "loss": 0.056732177734375, + "step": 13477 + }, + { + "epoch": 0.9110450182506421, + "grad_norm": 1.0812299251556396, + "learning_rate": 6.250503661106233e-07, + "loss": 0.1603240966796875, + "step": 13478 + }, + { + "epoch": 0.9111126132215763, + "grad_norm": 0.4193282127380371, + "learning_rate": 6.241081584271192e-07, + "loss": 0.0471954345703125, + "step": 13479 + }, + { + "epoch": 0.9111802081925104, + "grad_norm": 0.8143748044967651, + "learning_rate": 6.231666463386648e-07, + "loss": 0.13459014892578125, + "step": 13480 + }, + { + "epoch": 0.9112478031634447, + "grad_norm": 0.7588253617286682, + "learning_rate": 6.22225829890819e-07, + "loss": 0.1335601806640625, + "step": 13481 + }, + { + "epoch": 0.9113153981343788, + "grad_norm": 0.18855297565460205, + "learning_rate": 6.212857091290985e-07, + "loss": 0.02280426025390625, + "step": 13482 + }, + { + "epoch": 0.911382993105313, + "grad_norm": 0.5088927745819092, + "learning_rate": 6.203462840989988e-07, + "loss": 0.0828704833984375, + "step": 13483 + }, + { + "epoch": 0.9114505880762471, + "grad_norm": 0.641592264175415, + "learning_rate": 6.194075548459683e-07, + "loss": 0.10003662109375, + "step": 13484 + }, + { + "epoch": 0.9115181830471812, + "grad_norm": 0.3509235680103302, + "learning_rate": 6.184695214154356e-07, + "loss": 0.06696319580078125, + "step": 13485 + }, + { + "epoch": 0.9115857780181155, + "grad_norm": 0.6248899698257446, + "learning_rate": 6.175321838527848e-07, + "loss": 0.08905792236328125, + "step": 13486 + }, + { + "epoch": 0.9116533729890496, + "grad_norm": 0.8427824974060059, + "learning_rate": 6.165955422033692e-07, + "loss": 0.18695068359375, + "step": 13487 + }, + { + "epoch": 0.9117209679599838, + "grad_norm": 1.0147122144699097, + "learning_rate": 6.156595965125111e-07, + "loss": 0.145599365234375, + "step": 13488 + }, + { + "epoch": 0.9117885629309179, + "grad_norm": 0.5721510648727417, + "learning_rate": 6.147243468254976e-07, + "loss": 0.07758712768554688, + "step": 13489 + }, + { + "epoch": 0.9118561579018521, + "grad_norm": 0.6743371486663818, + "learning_rate": 6.137897931875824e-07, + "loss": 0.12067794799804688, + "step": 13490 + }, + { + "epoch": 0.9119237528727863, + "grad_norm": 0.3034800589084625, + "learning_rate": 6.128559356439828e-07, + "loss": 0.05414581298828125, + "step": 13491 + }, + { + "epoch": 0.9119913478437204, + "grad_norm": 1.004888892173767, + "learning_rate": 6.119227742398842e-07, + "loss": 0.167633056640625, + "step": 13492 + }, + { + "epoch": 0.9120589428146546, + "grad_norm": 0.26356521248817444, + "learning_rate": 6.109903090204405e-07, + "loss": 0.03138160705566406, + "step": 13493 + }, + { + "epoch": 0.9121265377855887, + "grad_norm": 0.8552187085151672, + "learning_rate": 6.100585400307723e-07, + "loss": 0.1221160888671875, + "step": 13494 + }, + { + "epoch": 0.912194132756523, + "grad_norm": 0.441236674785614, + "learning_rate": 6.091274673159603e-07, + "loss": 0.06988906860351562, + "step": 13495 + }, + { + "epoch": 0.9122617277274571, + "grad_norm": 0.31245478987693787, + "learning_rate": 6.081970909210582e-07, + "loss": 0.038219451904296875, + "step": 13496 + }, + { + "epoch": 0.9123293226983913, + "grad_norm": 0.9176990389823914, + "learning_rate": 6.072674108910803e-07, + "loss": 0.13483428955078125, + "step": 13497 + }, + { + "epoch": 0.9123969176693254, + "grad_norm": 0.573716402053833, + "learning_rate": 6.063384272710154e-07, + "loss": 0.06852340698242188, + "step": 13498 + }, + { + "epoch": 0.9124645126402595, + "grad_norm": 0.9162142872810364, + "learning_rate": 6.054101401058109e-07, + "loss": 0.16162109375, + "step": 13499 + }, + { + "epoch": 0.9125321076111937, + "grad_norm": 0.26116469502449036, + "learning_rate": 6.044825494403794e-07, + "loss": 0.04750823974609375, + "step": 13500 + }, + { + "epoch": 0.9125997025821279, + "grad_norm": 0.6460662484169006, + "learning_rate": 6.035556553196098e-07, + "loss": 0.08935737609863281, + "step": 13501 + }, + { + "epoch": 0.9126672975530621, + "grad_norm": 1.9689894914627075, + "learning_rate": 6.026294577883462e-07, + "loss": 0.1594696044921875, + "step": 13502 + }, + { + "epoch": 0.9127348925239962, + "grad_norm": 1.0207507610321045, + "learning_rate": 6.017039568914079e-07, + "loss": 0.1634979248046875, + "step": 13503 + }, + { + "epoch": 0.9128024874949304, + "grad_norm": 0.2231149971485138, + "learning_rate": 6.007791526735724e-07, + "loss": 0.03368377685546875, + "step": 13504 + }, + { + "epoch": 0.9128700824658645, + "grad_norm": 0.33949682116508484, + "learning_rate": 5.998550451795903e-07, + "loss": 0.04642295837402344, + "step": 13505 + }, + { + "epoch": 0.9129376774367987, + "grad_norm": 0.9006205201148987, + "learning_rate": 5.989316344541729e-07, + "loss": 0.13768959045410156, + "step": 13506 + }, + { + "epoch": 0.9130052724077329, + "grad_norm": 0.6851963996887207, + "learning_rate": 5.980089205420059e-07, + "loss": 0.1608734130859375, + "step": 13507 + }, + { + "epoch": 0.913072867378667, + "grad_norm": 1.2942208051681519, + "learning_rate": 5.970869034877286e-07, + "loss": 0.14176559448242188, + "step": 13508 + }, + { + "epoch": 0.9131404623496012, + "grad_norm": 0.3028548061847687, + "learning_rate": 5.961655833359603e-07, + "loss": 0.05029296875, + "step": 13509 + }, + { + "epoch": 0.9132080573205353, + "grad_norm": 1.0200915336608887, + "learning_rate": 5.95244960131277e-07, + "loss": 0.1542816162109375, + "step": 13510 + }, + { + "epoch": 0.9132756522914696, + "grad_norm": 0.20915336906909943, + "learning_rate": 5.943250339182216e-07, + "loss": 0.023862838745117188, + "step": 13511 + }, + { + "epoch": 0.9133432472624037, + "grad_norm": 1.117354154586792, + "learning_rate": 5.934058047413133e-07, + "loss": 0.1797332763671875, + "step": 13512 + }, + { + "epoch": 0.9134108422333378, + "grad_norm": 0.5572925806045532, + "learning_rate": 5.924872726450231e-07, + "loss": 0.11760711669921875, + "step": 13513 + }, + { + "epoch": 0.913478437204272, + "grad_norm": 1.375291109085083, + "learning_rate": 5.91569437673799e-07, + "loss": 0.189056396484375, + "step": 13514 + }, + { + "epoch": 0.9135460321752061, + "grad_norm": 0.26221001148223877, + "learning_rate": 5.906522998720504e-07, + "loss": 0.03576850891113281, + "step": 13515 + }, + { + "epoch": 0.9136136271461404, + "grad_norm": 0.3856334388256073, + "learning_rate": 5.897358592841551e-07, + "loss": 0.06314468383789062, + "step": 13516 + }, + { + "epoch": 0.9136812221170745, + "grad_norm": 1.3534797430038452, + "learning_rate": 5.888201159544543e-07, + "loss": 0.1635894775390625, + "step": 13517 + }, + { + "epoch": 0.9137488170880087, + "grad_norm": 1.309210181236267, + "learning_rate": 5.879050699272592e-07, + "loss": 0.1533660888671875, + "step": 13518 + }, + { + "epoch": 0.9138164120589428, + "grad_norm": 0.9456408619880676, + "learning_rate": 5.869907212468428e-07, + "loss": 0.1443023681640625, + "step": 13519 + }, + { + "epoch": 0.9138840070298769, + "grad_norm": 0.3805961012840271, + "learning_rate": 5.86077069957453e-07, + "loss": 0.05496978759765625, + "step": 13520 + }, + { + "epoch": 0.9139516020008112, + "grad_norm": 0.8969595432281494, + "learning_rate": 5.851641161032895e-07, + "loss": 0.1286468505859375, + "step": 13521 + }, + { + "epoch": 0.9140191969717453, + "grad_norm": 0.5658682584762573, + "learning_rate": 5.842518597285318e-07, + "loss": 0.09039306640625, + "step": 13522 + }, + { + "epoch": 0.9140867919426795, + "grad_norm": 0.6572962403297424, + "learning_rate": 5.83340300877323e-07, + "loss": 0.119384765625, + "step": 13523 + }, + { + "epoch": 0.9141543869136136, + "grad_norm": 0.4713250696659088, + "learning_rate": 5.82429439593763e-07, + "loss": 0.086395263671875, + "step": 13524 + }, + { + "epoch": 0.9142219818845478, + "grad_norm": 0.31778860092163086, + "learning_rate": 5.815192759219312e-07, + "loss": 0.07318878173828125, + "step": 13525 + }, + { + "epoch": 0.914289576855482, + "grad_norm": 0.691216230392456, + "learning_rate": 5.80609809905861e-07, + "loss": 0.1326141357421875, + "step": 13526 + }, + { + "epoch": 0.9143571718264161, + "grad_norm": 1.060958743095398, + "learning_rate": 5.797010415895637e-07, + "loss": 0.191253662109375, + "step": 13527 + }, + { + "epoch": 0.9144247667973503, + "grad_norm": 0.8626662492752075, + "learning_rate": 5.787929710170093e-07, + "loss": 0.12065505981445312, + "step": 13528 + }, + { + "epoch": 0.9144923617682844, + "grad_norm": 0.3813285827636719, + "learning_rate": 5.778855982321357e-07, + "loss": 0.06079292297363281, + "step": 13529 + }, + { + "epoch": 0.9145599567392186, + "grad_norm": 0.25759416818618774, + "learning_rate": 5.769789232788463e-07, + "loss": 0.035106658935546875, + "step": 13530 + }, + { + "epoch": 0.9146275517101528, + "grad_norm": 0.9502987265586853, + "learning_rate": 5.760729462010112e-07, + "loss": 0.169036865234375, + "step": 13531 + }, + { + "epoch": 0.914695146681087, + "grad_norm": 1.0165084600448608, + "learning_rate": 5.751676670424683e-07, + "loss": 0.16986083984375, + "step": 13532 + }, + { + "epoch": 0.9147627416520211, + "grad_norm": 0.3827815353870392, + "learning_rate": 5.742630858470243e-07, + "loss": 0.06282806396484375, + "step": 13533 + }, + { + "epoch": 0.9148303366229552, + "grad_norm": 0.7828120589256287, + "learning_rate": 5.733592026584428e-07, + "loss": 0.13140869140625, + "step": 13534 + }, + { + "epoch": 0.9148979315938894, + "grad_norm": 0.4721307158470154, + "learning_rate": 5.724560175204585e-07, + "loss": 0.0616302490234375, + "step": 13535 + }, + { + "epoch": 0.9149655265648236, + "grad_norm": 1.0713104009628296, + "learning_rate": 5.715535304767816e-07, + "loss": 0.14404296875, + "step": 13536 + }, + { + "epoch": 0.9150331215357578, + "grad_norm": 0.7904289364814758, + "learning_rate": 5.706517415710688e-07, + "loss": 0.11639404296875, + "step": 13537 + }, + { + "epoch": 0.9151007165066919, + "grad_norm": 0.9443607330322266, + "learning_rate": 5.697506508469652e-07, + "loss": 0.1634063720703125, + "step": 13538 + }, + { + "epoch": 0.9151683114776261, + "grad_norm": 0.5105152130126953, + "learning_rate": 5.688502583480609e-07, + "loss": 0.08350753784179688, + "step": 13539 + }, + { + "epoch": 0.9152359064485602, + "grad_norm": 0.34435465931892395, + "learning_rate": 5.679505641179312e-07, + "loss": 0.048992156982421875, + "step": 13540 + }, + { + "epoch": 0.9153035014194943, + "grad_norm": 0.7908485531806946, + "learning_rate": 5.670515682001026e-07, + "loss": 0.137664794921875, + "step": 13541 + }, + { + "epoch": 0.9153710963904286, + "grad_norm": 0.75177001953125, + "learning_rate": 5.66153270638079e-07, + "loss": 0.133209228515625, + "step": 13542 + }, + { + "epoch": 0.9154386913613627, + "grad_norm": 1.0113977193832397, + "learning_rate": 5.652556714753221e-07, + "loss": 0.13568115234375, + "step": 13543 + }, + { + "epoch": 0.9155062863322969, + "grad_norm": 0.24671345949172974, + "learning_rate": 5.643587707552639e-07, + "loss": 0.04679107666015625, + "step": 13544 + }, + { + "epoch": 0.915573881303231, + "grad_norm": 0.6496099233627319, + "learning_rate": 5.634625685213047e-07, + "loss": 0.1055450439453125, + "step": 13545 + }, + { + "epoch": 0.9156414762741653, + "grad_norm": 1.0707203149795532, + "learning_rate": 5.625670648168046e-07, + "loss": 0.16130447387695312, + "step": 13546 + }, + { + "epoch": 0.9157090712450994, + "grad_norm": 0.45743340253829956, + "learning_rate": 5.616722596850959e-07, + "loss": 0.09192657470703125, + "step": 13547 + }, + { + "epoch": 0.9157766662160335, + "grad_norm": 0.3613511323928833, + "learning_rate": 5.607781531694723e-07, + "loss": 0.06447601318359375, + "step": 13548 + }, + { + "epoch": 0.9158442611869677, + "grad_norm": 0.7000645399093628, + "learning_rate": 5.598847453132023e-07, + "loss": 0.1034088134765625, + "step": 13549 + }, + { + "epoch": 0.9159118561579018, + "grad_norm": 0.5537354946136475, + "learning_rate": 5.589920361595063e-07, + "loss": 0.08135986328125, + "step": 13550 + }, + { + "epoch": 0.915979451128836, + "grad_norm": 0.5429935455322266, + "learning_rate": 5.58100025751585e-07, + "loss": 0.08803176879882812, + "step": 13551 + }, + { + "epoch": 0.9160470460997702, + "grad_norm": 0.3659822642803192, + "learning_rate": 5.572087141325971e-07, + "loss": 0.06264114379882812, + "step": 13552 + }, + { + "epoch": 0.9161146410707044, + "grad_norm": 0.32346072793006897, + "learning_rate": 5.563181013456714e-07, + "loss": 0.04090118408203125, + "step": 13553 + }, + { + "epoch": 0.9161822360416385, + "grad_norm": 0.5060377717018127, + "learning_rate": 5.554281874339001e-07, + "loss": 0.090087890625, + "step": 13554 + }, + { + "epoch": 0.9162498310125726, + "grad_norm": 0.4068121910095215, + "learning_rate": 5.545389724403421e-07, + "loss": 0.0414886474609375, + "step": 13555 + }, + { + "epoch": 0.9163174259835069, + "grad_norm": 0.29738062620162964, + "learning_rate": 5.536504564080247e-07, + "loss": 0.05200958251953125, + "step": 13556 + }, + { + "epoch": 0.916385020954441, + "grad_norm": 0.7398475408554077, + "learning_rate": 5.5276263937994e-07, + "loss": 0.1428070068359375, + "step": 13557 + }, + { + "epoch": 0.9164526159253752, + "grad_norm": 0.2984209358692169, + "learning_rate": 5.51875521399044e-07, + "loss": 0.06371307373046875, + "step": 13558 + }, + { + "epoch": 0.9165202108963093, + "grad_norm": 0.5920302867889404, + "learning_rate": 5.50989102508262e-07, + "loss": 0.100860595703125, + "step": 13559 + }, + { + "epoch": 0.9165878058672435, + "grad_norm": 0.3533002734184265, + "learning_rate": 5.501033827504848e-07, + "loss": 0.05344390869140625, + "step": 13560 + }, + { + "epoch": 0.9166554008381776, + "grad_norm": 1.152632236480713, + "learning_rate": 5.492183621685681e-07, + "loss": 0.12511062622070312, + "step": 13561 + }, + { + "epoch": 0.9167229958091118, + "grad_norm": 0.22634610533714294, + "learning_rate": 5.483340408053378e-07, + "loss": 0.031352996826171875, + "step": 13562 + }, + { + "epoch": 0.916790590780046, + "grad_norm": 0.3137570321559906, + "learning_rate": 5.474504187035778e-07, + "loss": 0.060169219970703125, + "step": 13563 + }, + { + "epoch": 0.9168581857509801, + "grad_norm": 0.41822242736816406, + "learning_rate": 5.465674959060473e-07, + "loss": 0.06854248046875, + "step": 13564 + }, + { + "epoch": 0.9169257807219143, + "grad_norm": 0.5086105465888977, + "learning_rate": 5.456852724554673e-07, + "loss": 0.0955963134765625, + "step": 13565 + }, + { + "epoch": 0.9169933756928484, + "grad_norm": 0.30431419610977173, + "learning_rate": 5.448037483945234e-07, + "loss": 0.045413970947265625, + "step": 13566 + }, + { + "epoch": 0.9170609706637826, + "grad_norm": 0.2960570752620697, + "learning_rate": 5.4392292376587e-07, + "loss": 0.05242919921875, + "step": 13567 + }, + { + "epoch": 0.9171285656347168, + "grad_norm": 0.5238589644432068, + "learning_rate": 5.430427986121261e-07, + "loss": 0.06935691833496094, + "step": 13568 + }, + { + "epoch": 0.9171961606056509, + "grad_norm": 0.776096761226654, + "learning_rate": 5.421633729758796e-07, + "loss": 0.1249847412109375, + "step": 13569 + }, + { + "epoch": 0.9172637555765851, + "grad_norm": 2.475243091583252, + "learning_rate": 5.412846468996812e-07, + "loss": 0.1852569580078125, + "step": 13570 + }, + { + "epoch": 0.9173313505475192, + "grad_norm": 0.7837713956832886, + "learning_rate": 5.404066204260505e-07, + "loss": 0.09009552001953125, + "step": 13571 + }, + { + "epoch": 0.9173989455184535, + "grad_norm": 0.32358551025390625, + "learning_rate": 5.395292935974683e-07, + "loss": 0.03778076171875, + "step": 13572 + }, + { + "epoch": 0.9174665404893876, + "grad_norm": 0.41456297039985657, + "learning_rate": 5.38652666456389e-07, + "loss": 0.071807861328125, + "step": 13573 + }, + { + "epoch": 0.9175341354603217, + "grad_norm": 0.6224520206451416, + "learning_rate": 5.377767390452271e-07, + "loss": 0.100372314453125, + "step": 13574 + }, + { + "epoch": 0.9176017304312559, + "grad_norm": 0.5807545185089111, + "learning_rate": 5.369015114063686e-07, + "loss": 0.1044464111328125, + "step": 13575 + }, + { + "epoch": 0.91766932540219, + "grad_norm": 0.21599659323692322, + "learning_rate": 5.360269835821563e-07, + "loss": 0.027853012084960938, + "step": 13576 + }, + { + "epoch": 0.9177369203731243, + "grad_norm": 0.18660035729408264, + "learning_rate": 5.351531556149097e-07, + "loss": 0.032367706298828125, + "step": 13577 + }, + { + "epoch": 0.9178045153440584, + "grad_norm": 0.22279268503189087, + "learning_rate": 5.342800275469118e-07, + "loss": 0.02870464324951172, + "step": 13578 + }, + { + "epoch": 0.9178721103149926, + "grad_norm": 0.7263851165771484, + "learning_rate": 5.334075994204052e-07, + "loss": 0.10423660278320312, + "step": 13579 + }, + { + "epoch": 0.9179397052859267, + "grad_norm": 0.27469319105148315, + "learning_rate": 5.325358712776079e-07, + "loss": 0.05254364013671875, + "step": 13580 + }, + { + "epoch": 0.9180073002568608, + "grad_norm": 0.2704565227031708, + "learning_rate": 5.316648431606963e-07, + "loss": 0.05014801025390625, + "step": 13581 + }, + { + "epoch": 0.9180748952277951, + "grad_norm": 0.7879111766815186, + "learning_rate": 5.307945151118166e-07, + "loss": 0.1197052001953125, + "step": 13582 + }, + { + "epoch": 0.9181424901987292, + "grad_norm": 0.5000049471855164, + "learning_rate": 5.299248871730816e-07, + "loss": 0.06750869750976562, + "step": 13583 + }, + { + "epoch": 0.9182100851696634, + "grad_norm": 0.3491019904613495, + "learning_rate": 5.290559593865696e-07, + "loss": 0.0565948486328125, + "step": 13584 + }, + { + "epoch": 0.9182776801405975, + "grad_norm": 0.35085487365722656, + "learning_rate": 5.281877317943234e-07, + "loss": 0.06268310546875, + "step": 13585 + }, + { + "epoch": 0.9183452751115317, + "grad_norm": 0.7210081815719604, + "learning_rate": 5.273202044383546e-07, + "loss": 0.1214141845703125, + "step": 13586 + }, + { + "epoch": 0.9184128700824659, + "grad_norm": 0.23568634688854218, + "learning_rate": 5.264533773606362e-07, + "loss": 0.036464691162109375, + "step": 13587 + }, + { + "epoch": 0.9184804650534, + "grad_norm": 1.1437805891036987, + "learning_rate": 5.255872506031179e-07, + "loss": 0.192718505859375, + "step": 13588 + }, + { + "epoch": 0.9185480600243342, + "grad_norm": 0.7675701975822449, + "learning_rate": 5.247218242077012e-07, + "loss": 0.14678955078125, + "step": 13589 + }, + { + "epoch": 0.9186156549952683, + "grad_norm": 1.4623252153396606, + "learning_rate": 5.238570982162627e-07, + "loss": 0.16181564331054688, + "step": 13590 + }, + { + "epoch": 0.9186832499662025, + "grad_norm": 0.1841786950826645, + "learning_rate": 5.229930726706455e-07, + "loss": 0.0210418701171875, + "step": 13591 + }, + { + "epoch": 0.9187508449371367, + "grad_norm": 1.1310869455337524, + "learning_rate": 5.221297476126547e-07, + "loss": 0.20501708984375, + "step": 13592 + }, + { + "epoch": 0.9188184399080709, + "grad_norm": 1.6796023845672607, + "learning_rate": 5.212671230840649e-07, + "loss": 0.13383865356445312, + "step": 13593 + }, + { + "epoch": 0.918886034879005, + "grad_norm": 0.47477489709854126, + "learning_rate": 5.204051991266129e-07, + "loss": 0.085723876953125, + "step": 13594 + }, + { + "epoch": 0.9189536298499391, + "grad_norm": 0.38207727670669556, + "learning_rate": 5.195439757820036e-07, + "loss": 0.0726470947265625, + "step": 13595 + }, + { + "epoch": 0.9190212248208733, + "grad_norm": 1.2799925804138184, + "learning_rate": 5.186834530919121e-07, + "loss": 0.22015380859375, + "step": 13596 + }, + { + "epoch": 0.9190888197918075, + "grad_norm": 1.095625400543213, + "learning_rate": 5.178236310979718e-07, + "loss": 0.201873779296875, + "step": 13597 + }, + { + "epoch": 0.9191564147627417, + "grad_norm": 0.6344636082649231, + "learning_rate": 5.169645098417875e-07, + "loss": 0.08727169036865234, + "step": 13598 + }, + { + "epoch": 0.9192240097336758, + "grad_norm": 0.46266645193099976, + "learning_rate": 5.161060893649311e-07, + "loss": 0.08416748046875, + "step": 13599 + }, + { + "epoch": 0.91929160470461, + "grad_norm": 0.4222424626350403, + "learning_rate": 5.152483697089344e-07, + "loss": 0.04864501953125, + "step": 13600 + }, + { + "epoch": 0.9193591996755441, + "grad_norm": 0.3166441023349762, + "learning_rate": 5.143913509153042e-07, + "loss": 0.042510986328125, + "step": 13601 + }, + { + "epoch": 0.9194267946464783, + "grad_norm": 0.7062770128250122, + "learning_rate": 5.135350330255023e-07, + "loss": 0.136688232421875, + "step": 13602 + }, + { + "epoch": 0.9194943896174125, + "grad_norm": 0.44021302461624146, + "learning_rate": 5.126794160809672e-07, + "loss": 0.0983123779296875, + "step": 13603 + }, + { + "epoch": 0.9195619845883466, + "grad_norm": 0.49268898367881775, + "learning_rate": 5.118245001230993e-07, + "loss": 0.0869293212890625, + "step": 13604 + }, + { + "epoch": 0.9196295795592808, + "grad_norm": 0.30313360691070557, + "learning_rate": 5.109702851932602e-07, + "loss": 0.049999237060546875, + "step": 13605 + }, + { + "epoch": 0.9196971745302149, + "grad_norm": 0.49855583906173706, + "learning_rate": 5.101167713327853e-07, + "loss": 0.03910064697265625, + "step": 13606 + }, + { + "epoch": 0.9197647695011492, + "grad_norm": 1.790947675704956, + "learning_rate": 5.092639585829733e-07, + "loss": 0.2362823486328125, + "step": 13607 + }, + { + "epoch": 0.9198323644720833, + "grad_norm": 0.3928556442260742, + "learning_rate": 5.084118469850895e-07, + "loss": 0.0667266845703125, + "step": 13608 + }, + { + "epoch": 0.9198999594430174, + "grad_norm": 0.6321899890899658, + "learning_rate": 5.07560436580361e-07, + "loss": 0.08367156982421875, + "step": 13609 + }, + { + "epoch": 0.9199675544139516, + "grad_norm": 0.8137621879577637, + "learning_rate": 5.067097274099864e-07, + "loss": 0.1528472900390625, + "step": 13610 + }, + { + "epoch": 0.9200351493848857, + "grad_norm": 0.39247530698776245, + "learning_rate": 5.058597195151276e-07, + "loss": 0.0771484375, + "step": 13611 + }, + { + "epoch": 0.92010274435582, + "grad_norm": 1.3413443565368652, + "learning_rate": 5.050104129369154e-07, + "loss": 0.1786041259765625, + "step": 13612 + }, + { + "epoch": 0.9201703393267541, + "grad_norm": 0.9827868342399597, + "learning_rate": 5.041618077164401e-07, + "loss": 0.10573577880859375, + "step": 13613 + }, + { + "epoch": 0.9202379342976883, + "grad_norm": 0.8041199445724487, + "learning_rate": 5.03313903894767e-07, + "loss": 0.120025634765625, + "step": 13614 + }, + { + "epoch": 0.9203055292686224, + "grad_norm": 0.96326744556427, + "learning_rate": 5.024667015129219e-07, + "loss": 0.13622283935546875, + "step": 13615 + }, + { + "epoch": 0.9203731242395565, + "grad_norm": 0.3010994493961334, + "learning_rate": 5.016202006118953e-07, + "loss": 0.054729461669921875, + "step": 13616 + }, + { + "epoch": 0.9204407192104908, + "grad_norm": 0.5254852175712585, + "learning_rate": 5.007744012326493e-07, + "loss": 0.07799530029296875, + "step": 13617 + }, + { + "epoch": 0.9205083141814249, + "grad_norm": 0.5036711692810059, + "learning_rate": 4.999293034161046e-07, + "loss": 0.09210205078125, + "step": 13618 + }, + { + "epoch": 0.9205759091523591, + "grad_norm": 0.8841966390609741, + "learning_rate": 4.990849072031583e-07, + "loss": 0.1438751220703125, + "step": 13619 + }, + { + "epoch": 0.9206435041232932, + "grad_norm": 0.6840681433677673, + "learning_rate": 4.982412126346631e-07, + "loss": 0.119964599609375, + "step": 13620 + }, + { + "epoch": 0.9207110990942274, + "grad_norm": 0.412309467792511, + "learning_rate": 4.973982197514443e-07, + "loss": 0.05683326721191406, + "step": 13621 + }, + { + "epoch": 0.9207786940651616, + "grad_norm": 0.9665268063545227, + "learning_rate": 4.965559285942895e-07, + "loss": 0.170257568359375, + "step": 13622 + }, + { + "epoch": 0.9208462890360957, + "grad_norm": 0.6042956709861755, + "learning_rate": 4.957143392039543e-07, + "loss": 0.083282470703125, + "step": 13623 + }, + { + "epoch": 0.9209138840070299, + "grad_norm": 0.3231401741504669, + "learning_rate": 4.948734516211612e-07, + "loss": 0.056056976318359375, + "step": 13624 + }, + { + "epoch": 0.920981478977964, + "grad_norm": 0.287740021944046, + "learning_rate": 4.940332658865976e-07, + "loss": 0.044574737548828125, + "step": 13625 + }, + { + "epoch": 0.9210490739488982, + "grad_norm": 0.8665506839752197, + "learning_rate": 4.931937820409143e-07, + "loss": 0.154052734375, + "step": 13626 + }, + { + "epoch": 0.9211166689198323, + "grad_norm": 0.668350100517273, + "learning_rate": 4.92355000124734e-07, + "loss": 0.10336494445800781, + "step": 13627 + }, + { + "epoch": 0.9211842638907666, + "grad_norm": 0.6128832697868347, + "learning_rate": 4.915169201786441e-07, + "loss": 0.1050872802734375, + "step": 13628 + }, + { + "epoch": 0.9212518588617007, + "grad_norm": 0.562322735786438, + "learning_rate": 4.90679542243187e-07, + "loss": 0.0874481201171875, + "step": 13629 + }, + { + "epoch": 0.9213194538326348, + "grad_norm": 0.2519761323928833, + "learning_rate": 4.898428663588905e-07, + "loss": 0.03492546081542969, + "step": 13630 + }, + { + "epoch": 0.921387048803569, + "grad_norm": 0.7562610507011414, + "learning_rate": 4.890068925662305e-07, + "loss": 0.1229705810546875, + "step": 13631 + }, + { + "epoch": 0.9214546437745031, + "grad_norm": 0.3536648452281952, + "learning_rate": 4.88171620905663e-07, + "loss": 0.04242706298828125, + "step": 13632 + }, + { + "epoch": 0.9215222387454374, + "grad_norm": 1.8423975706100464, + "learning_rate": 4.87337051417599e-07, + "loss": 0.184051513671875, + "step": 13633 + }, + { + "epoch": 0.9215898337163715, + "grad_norm": 0.6145304441452026, + "learning_rate": 4.865031841424211e-07, + "loss": 0.1259918212890625, + "step": 13634 + }, + { + "epoch": 0.9216574286873057, + "grad_norm": 0.7184972167015076, + "learning_rate": 4.856700191204788e-07, + "loss": 0.08404350280761719, + "step": 13635 + }, + { + "epoch": 0.9217250236582398, + "grad_norm": 0.9668490290641785, + "learning_rate": 4.848375563920848e-07, + "loss": 0.1785736083984375, + "step": 13636 + }, + { + "epoch": 0.9217926186291739, + "grad_norm": 0.2191901057958603, + "learning_rate": 4.840057959975169e-07, + "loss": 0.03289031982421875, + "step": 13637 + }, + { + "epoch": 0.9218602136001082, + "grad_norm": 0.7283400297164917, + "learning_rate": 4.831747379770246e-07, + "loss": 0.14093017578125, + "step": 13638 + }, + { + "epoch": 0.9219278085710423, + "grad_norm": 0.22178247570991516, + "learning_rate": 4.823443823708173e-07, + "loss": 0.024030685424804688, + "step": 13639 + }, + { + "epoch": 0.9219954035419765, + "grad_norm": 0.7920030951499939, + "learning_rate": 4.815147292190713e-07, + "loss": 0.18865966796875, + "step": 13640 + }, + { + "epoch": 0.9220629985129106, + "grad_norm": 1.1345208883285522, + "learning_rate": 4.806857785619361e-07, + "loss": 0.211639404296875, + "step": 13641 + }, + { + "epoch": 0.9221305934838449, + "grad_norm": 0.2862480878829956, + "learning_rate": 4.798575304395131e-07, + "loss": 0.05377197265625, + "step": 13642 + }, + { + "epoch": 0.922198188454779, + "grad_norm": 0.3820965886116028, + "learning_rate": 4.790299848918849e-07, + "loss": 0.07538604736328125, + "step": 13643 + }, + { + "epoch": 0.9222657834257131, + "grad_norm": 0.8704479336738586, + "learning_rate": 4.782031419590899e-07, + "loss": 0.155517578125, + "step": 13644 + }, + { + "epoch": 0.9223333783966473, + "grad_norm": 0.4593222737312317, + "learning_rate": 4.773770016811391e-07, + "loss": 0.07220458984375, + "step": 13645 + }, + { + "epoch": 0.9224009733675814, + "grad_norm": 0.23254582285881042, + "learning_rate": 4.7655156409800227e-07, + "loss": 0.04013824462890625, + "step": 13646 + }, + { + "epoch": 0.9224685683385156, + "grad_norm": 0.4609673023223877, + "learning_rate": 4.7572682924962085e-07, + "loss": 0.07907390594482422, + "step": 13647 + }, + { + "epoch": 0.9225361633094498, + "grad_norm": 0.267018586397171, + "learning_rate": 4.749027971759029e-07, + "loss": 0.0439910888671875, + "step": 13648 + }, + { + "epoch": 0.922603758280384, + "grad_norm": 0.9663441181182861, + "learning_rate": 4.7407946791671654e-07, + "loss": 0.154876708984375, + "step": 13649 + }, + { + "epoch": 0.9226713532513181, + "grad_norm": 0.8319170475006104, + "learning_rate": 4.732568415119015e-07, + "loss": 0.110931396484375, + "step": 13650 + }, + { + "epoch": 0.9227389482222522, + "grad_norm": 0.9129304885864258, + "learning_rate": 4.724349180012627e-07, + "loss": 0.1368255615234375, + "step": 13651 + }, + { + "epoch": 0.9228065431931864, + "grad_norm": 0.45655855536460876, + "learning_rate": 4.716136974245666e-07, + "loss": 0.06781387329101562, + "step": 13652 + }, + { + "epoch": 0.9228741381641206, + "grad_norm": 1.4268999099731445, + "learning_rate": 4.707931798215498e-07, + "loss": 0.204132080078125, + "step": 13653 + }, + { + "epoch": 0.9229417331350548, + "grad_norm": 0.38382434844970703, + "learning_rate": 4.699733652319188e-07, + "loss": 0.04393768310546875, + "step": 13654 + }, + { + "epoch": 0.9230093281059889, + "grad_norm": 1.308078408241272, + "learning_rate": 4.691542536953353e-07, + "loss": 0.200439453125, + "step": 13655 + }, + { + "epoch": 0.9230769230769231, + "grad_norm": 0.2976817488670349, + "learning_rate": 4.683358452514358e-07, + "loss": 0.0578460693359375, + "step": 13656 + }, + { + "epoch": 0.9231445180478572, + "grad_norm": 0.4098394811153412, + "learning_rate": 4.675181399398204e-07, + "loss": 0.06146240234375, + "step": 13657 + }, + { + "epoch": 0.9232121130187914, + "grad_norm": 0.2806520462036133, + "learning_rate": 4.6670113780005244e-07, + "loss": 0.056640625, + "step": 13658 + }, + { + "epoch": 0.9232797079897256, + "grad_norm": 0.5025913119316101, + "learning_rate": 4.658848388716652e-07, + "loss": 0.090118408203125, + "step": 13659 + }, + { + "epoch": 0.9233473029606597, + "grad_norm": 0.32737037539482117, + "learning_rate": 4.6506924319415724e-07, + "loss": 0.05294036865234375, + "step": 13660 + }, + { + "epoch": 0.9234148979315939, + "grad_norm": 0.2629452645778656, + "learning_rate": 4.642543508069902e-07, + "loss": 0.058135986328125, + "step": 13661 + }, + { + "epoch": 0.923482492902528, + "grad_norm": 1.0265579223632812, + "learning_rate": 4.634401617495959e-07, + "loss": 0.1452789306640625, + "step": 13662 + }, + { + "epoch": 0.9235500878734623, + "grad_norm": 0.49482792615890503, + "learning_rate": 4.626266760613679e-07, + "loss": 0.061920166015625, + "step": 13663 + }, + { + "epoch": 0.9236176828443964, + "grad_norm": 0.42396965622901917, + "learning_rate": 4.6181389378166796e-07, + "loss": 0.07947540283203125, + "step": 13664 + }, + { + "epoch": 0.9236852778153305, + "grad_norm": 0.6227146983146667, + "learning_rate": 4.6100181494982296e-07, + "loss": 0.09498405456542969, + "step": 13665 + }, + { + "epoch": 0.9237528727862647, + "grad_norm": 1.7069098949432373, + "learning_rate": 4.6019043960512653e-07, + "loss": 0.259002685546875, + "step": 13666 + }, + { + "epoch": 0.9238204677571988, + "grad_norm": 0.3144267797470093, + "learning_rate": 4.5937976778684223e-07, + "loss": 0.037944793701171875, + "step": 13667 + }, + { + "epoch": 0.9238880627281331, + "grad_norm": 0.591693639755249, + "learning_rate": 4.5856979953418866e-07, + "loss": 0.104400634765625, + "step": 13668 + }, + { + "epoch": 0.9239556576990672, + "grad_norm": 0.3210689127445221, + "learning_rate": 4.577605348863612e-07, + "loss": 0.0625762939453125, + "step": 13669 + }, + { + "epoch": 0.9240232526700014, + "grad_norm": 1.0975699424743652, + "learning_rate": 4.5695197388251673e-07, + "loss": 0.1795654296875, + "step": 13670 + }, + { + "epoch": 0.9240908476409355, + "grad_norm": 0.32815974950790405, + "learning_rate": 4.5614411656177746e-07, + "loss": 0.036688804626464844, + "step": 13671 + }, + { + "epoch": 0.9241584426118696, + "grad_norm": 0.44723889231681824, + "learning_rate": 4.553369629632337e-07, + "loss": 0.06639480590820312, + "step": 13672 + }, + { + "epoch": 0.9242260375828039, + "grad_norm": 0.4222634434700012, + "learning_rate": 4.5453051312594087e-07, + "loss": 0.07198715209960938, + "step": 13673 + }, + { + "epoch": 0.924293632553738, + "grad_norm": 0.5379535555839539, + "learning_rate": 4.5372476708891776e-07, + "loss": 0.07884597778320312, + "step": 13674 + }, + { + "epoch": 0.9243612275246722, + "grad_norm": 0.3729986548423767, + "learning_rate": 4.529197248911532e-07, + "loss": 0.08199310302734375, + "step": 13675 + }, + { + "epoch": 0.9244288224956063, + "grad_norm": 0.8869614601135254, + "learning_rate": 4.521153865715993e-07, + "loss": 0.09732246398925781, + "step": 13676 + }, + { + "epoch": 0.9244964174665405, + "grad_norm": 0.5325038433074951, + "learning_rate": 4.5131175216917507e-07, + "loss": 0.09310531616210938, + "step": 13677 + }, + { + "epoch": 0.9245640124374747, + "grad_norm": 0.558497965335846, + "learning_rate": 4.5050882172276764e-07, + "loss": 0.06998634338378906, + "step": 13678 + }, + { + "epoch": 0.9246316074084088, + "grad_norm": 0.5832891464233398, + "learning_rate": 4.497065952712226e-07, + "loss": 0.07881927490234375, + "step": 13679 + }, + { + "epoch": 0.924699202379343, + "grad_norm": 0.8523566722869873, + "learning_rate": 4.489050728533622e-07, + "loss": 0.1461944580078125, + "step": 13680 + }, + { + "epoch": 0.9247667973502771, + "grad_norm": 0.666084885597229, + "learning_rate": 4.481042545079639e-07, + "loss": 0.0958251953125, + "step": 13681 + }, + { + "epoch": 0.9248343923212113, + "grad_norm": 0.4454844295978546, + "learning_rate": 4.473041402737815e-07, + "loss": 0.07139968872070312, + "step": 13682 + }, + { + "epoch": 0.9249019872921455, + "grad_norm": 0.5680017471313477, + "learning_rate": 4.465047301895259e-07, + "loss": 0.07523345947265625, + "step": 13683 + }, + { + "epoch": 0.9249695822630797, + "grad_norm": 0.5291257500648499, + "learning_rate": 4.457060242938793e-07, + "loss": 0.09416961669921875, + "step": 13684 + }, + { + "epoch": 0.9250371772340138, + "grad_norm": 0.7221667170524597, + "learning_rate": 4.4490802262548757e-07, + "loss": 0.1085357666015625, + "step": 13685 + }, + { + "epoch": 0.9251047722049479, + "grad_norm": 0.7372364401817322, + "learning_rate": 4.4411072522296317e-07, + "loss": 0.09183502197265625, + "step": 13686 + }, + { + "epoch": 0.9251723671758821, + "grad_norm": 0.8090590238571167, + "learning_rate": 4.433141321248835e-07, + "loss": 0.13558197021484375, + "step": 13687 + }, + { + "epoch": 0.9252399621468163, + "grad_norm": 0.40717440843582153, + "learning_rate": 4.425182433697927e-07, + "loss": 0.06525421142578125, + "step": 13688 + }, + { + "epoch": 0.9253075571177505, + "grad_norm": 0.512297511100769, + "learning_rate": 4.417230589962018e-07, + "loss": 0.0690155029296875, + "step": 13689 + }, + { + "epoch": 0.9253751520886846, + "grad_norm": 1.039636254310608, + "learning_rate": 4.409285790425849e-07, + "loss": 0.1300029754638672, + "step": 13690 + }, + { + "epoch": 0.9254427470596187, + "grad_norm": 0.19659464061260223, + "learning_rate": 4.401348035473879e-07, + "loss": 0.031177520751953125, + "step": 13691 + }, + { + "epoch": 0.9255103420305529, + "grad_norm": 0.8165456056594849, + "learning_rate": 4.3934173254901346e-07, + "loss": 0.125946044921875, + "step": 13692 + }, + { + "epoch": 0.925577937001487, + "grad_norm": 0.47174498438835144, + "learning_rate": 4.3854936608584086e-07, + "loss": 0.1029052734375, + "step": 13693 + }, + { + "epoch": 0.9256455319724213, + "grad_norm": 1.1121786832809448, + "learning_rate": 4.3775770419620275e-07, + "loss": 0.1636962890625, + "step": 13694 + }, + { + "epoch": 0.9257131269433554, + "grad_norm": 0.46444669365882874, + "learning_rate": 4.369667469184102e-07, + "loss": 0.0675201416015625, + "step": 13695 + }, + { + "epoch": 0.9257807219142896, + "grad_norm": 0.4175169765949249, + "learning_rate": 4.3617649429073423e-07, + "loss": 0.07022857666015625, + "step": 13696 + }, + { + "epoch": 0.9258483168852237, + "grad_norm": 0.8173969984054565, + "learning_rate": 4.353869463514093e-07, + "loss": 0.1804046630859375, + "step": 13697 + }, + { + "epoch": 0.9259159118561578, + "grad_norm": 0.6284742951393127, + "learning_rate": 4.3459810313864144e-07, + "loss": 0.1144561767578125, + "step": 13698 + }, + { + "epoch": 0.9259835068270921, + "grad_norm": 1.2057287693023682, + "learning_rate": 4.3380996469059853e-07, + "loss": 0.19171142578125, + "step": 13699 + }, + { + "epoch": 0.9260511017980262, + "grad_norm": 0.2902614176273346, + "learning_rate": 4.33022531045415e-07, + "loss": 0.040142059326171875, + "step": 13700 + }, + { + "epoch": 0.9261186967689604, + "grad_norm": 0.9663886427879333, + "learning_rate": 4.322358022411921e-07, + "loss": 0.13182449340820312, + "step": 13701 + }, + { + "epoch": 0.9261862917398945, + "grad_norm": 0.4984191358089447, + "learning_rate": 4.314497783159976e-07, + "loss": 0.08731842041015625, + "step": 13702 + }, + { + "epoch": 0.9262538867108288, + "grad_norm": 0.38026222586631775, + "learning_rate": 4.306644593078629e-07, + "loss": 0.048511505126953125, + "step": 13703 + }, + { + "epoch": 0.9263214816817629, + "grad_norm": 0.23818908631801605, + "learning_rate": 4.298798452547892e-07, + "loss": 0.04416656494140625, + "step": 13704 + }, + { + "epoch": 0.926389076652697, + "grad_norm": 0.5005627274513245, + "learning_rate": 4.290959361947361e-07, + "loss": 0.09820556640625, + "step": 13705 + }, + { + "epoch": 0.9264566716236312, + "grad_norm": 1.4132611751556396, + "learning_rate": 4.2831273216563995e-07, + "loss": 0.2218017578125, + "step": 13706 + }, + { + "epoch": 0.9265242665945653, + "grad_norm": 1.764533519744873, + "learning_rate": 4.2753023320539375e-07, + "loss": 0.11585235595703125, + "step": 13707 + }, + { + "epoch": 0.9265918615654996, + "grad_norm": 0.5080180168151855, + "learning_rate": 4.267484393518589e-07, + "loss": 0.08843994140625, + "step": 13708 + }, + { + "epoch": 0.9266594565364337, + "grad_norm": 0.7086172103881836, + "learning_rate": 4.2596735064286684e-07, + "loss": 0.11844635009765625, + "step": 13709 + }, + { + "epoch": 0.9267270515073679, + "grad_norm": 0.26619645953178406, + "learning_rate": 4.2518696711620733e-07, + "loss": 0.046611785888671875, + "step": 13710 + }, + { + "epoch": 0.926794646478302, + "grad_norm": 0.4665444791316986, + "learning_rate": 4.2440728880964177e-07, + "loss": 0.086273193359375, + "step": 13711 + }, + { + "epoch": 0.9268622414492361, + "grad_norm": 0.4030393064022064, + "learning_rate": 4.2362831576089834e-07, + "loss": 0.08032989501953125, + "step": 13712 + }, + { + "epoch": 0.9269298364201703, + "grad_norm": 1.1829394102096558, + "learning_rate": 4.2285004800766524e-07, + "loss": 0.10134506225585938, + "step": 13713 + }, + { + "epoch": 0.9269974313911045, + "grad_norm": 0.30251947045326233, + "learning_rate": 4.220724855876007e-07, + "loss": 0.041187286376953125, + "step": 13714 + }, + { + "epoch": 0.9270650263620387, + "grad_norm": 0.3078639507293701, + "learning_rate": 4.212956285383296e-07, + "loss": 0.045909881591796875, + "step": 13715 + }, + { + "epoch": 0.9271326213329728, + "grad_norm": 0.2257702350616455, + "learning_rate": 4.205194768974369e-07, + "loss": 0.025150299072265625, + "step": 13716 + }, + { + "epoch": 0.927200216303907, + "grad_norm": 0.35584908723831177, + "learning_rate": 4.197440307024858e-07, + "loss": 0.04028606414794922, + "step": 13717 + }, + { + "epoch": 0.9272678112748411, + "grad_norm": 0.8112654089927673, + "learning_rate": 4.1896928999098637e-07, + "loss": 0.1373748779296875, + "step": 13718 + }, + { + "epoch": 0.9273354062457753, + "grad_norm": 0.8975051045417786, + "learning_rate": 4.181952548004353e-07, + "loss": 0.10984039306640625, + "step": 13719 + }, + { + "epoch": 0.9274030012167095, + "grad_norm": 1.0395737886428833, + "learning_rate": 4.174219251682809e-07, + "loss": 0.1401519775390625, + "step": 13720 + }, + { + "epoch": 0.9274705961876436, + "grad_norm": 0.7897971272468567, + "learning_rate": 4.1664930113194e-07, + "loss": 0.10236358642578125, + "step": 13721 + }, + { + "epoch": 0.9275381911585778, + "grad_norm": 0.8100347518920898, + "learning_rate": 4.1587738272880093e-07, + "loss": 0.165252685546875, + "step": 13722 + }, + { + "epoch": 0.9276057861295119, + "grad_norm": 0.2963772118091583, + "learning_rate": 4.151061699962089e-07, + "loss": 0.050693511962890625, + "step": 13723 + }, + { + "epoch": 0.9276733811004462, + "grad_norm": 0.24410423636436462, + "learning_rate": 4.143356629714856e-07, + "loss": 0.046367645263671875, + "step": 13724 + }, + { + "epoch": 0.9277409760713803, + "grad_norm": 1.1434155702590942, + "learning_rate": 4.13565861691908e-07, + "loss": 0.15582275390625, + "step": 13725 + }, + { + "epoch": 0.9278085710423144, + "grad_norm": 0.5724903345108032, + "learning_rate": 4.1279676619472796e-07, + "loss": 0.08054351806640625, + "step": 13726 + }, + { + "epoch": 0.9278761660132486, + "grad_norm": 0.22632169723510742, + "learning_rate": 4.120283765171573e-07, + "loss": 0.042690277099609375, + "step": 13727 + }, + { + "epoch": 0.9279437609841827, + "grad_norm": 0.37792885303497314, + "learning_rate": 4.1126069269637465e-07, + "loss": 0.067626953125, + "step": 13728 + }, + { + "epoch": 0.928011355955117, + "grad_norm": 0.6722922921180725, + "learning_rate": 4.104937147695237e-07, + "loss": 0.132598876953125, + "step": 13729 + }, + { + "epoch": 0.9280789509260511, + "grad_norm": 0.26902297139167786, + "learning_rate": 4.0972744277372133e-07, + "loss": 0.04744720458984375, + "step": 13730 + }, + { + "epoch": 0.9281465458969853, + "grad_norm": 0.9997004270553589, + "learning_rate": 4.089618767460396e-07, + "loss": 0.1493682861328125, + "step": 13731 + }, + { + "epoch": 0.9282141408679194, + "grad_norm": 0.7189896106719971, + "learning_rate": 4.0819701672352383e-07, + "loss": 0.14556884765625, + "step": 13732 + }, + { + "epoch": 0.9282817358388535, + "grad_norm": 0.7826860547065735, + "learning_rate": 4.074328627431828e-07, + "loss": 0.14849853515625, + "step": 13733 + }, + { + "epoch": 0.9283493308097878, + "grad_norm": 0.18237030506134033, + "learning_rate": 4.0666941484198693e-07, + "loss": 0.035915374755859375, + "step": 13734 + }, + { + "epoch": 0.9284169257807219, + "grad_norm": 0.8004207015037537, + "learning_rate": 4.0590667305688167e-07, + "loss": 0.173858642578125, + "step": 13735 + }, + { + "epoch": 0.9284845207516561, + "grad_norm": 0.6942763328552246, + "learning_rate": 4.051446374247708e-07, + "loss": 0.11168289184570312, + "step": 13736 + }, + { + "epoch": 0.9285521157225902, + "grad_norm": 0.6697936654090881, + "learning_rate": 4.043833079825282e-07, + "loss": 0.09555625915527344, + "step": 13737 + }, + { + "epoch": 0.9286197106935244, + "grad_norm": 0.3938182592391968, + "learning_rate": 4.036226847669894e-07, + "loss": 0.06534194946289062, + "step": 13738 + }, + { + "epoch": 0.9286873056644586, + "grad_norm": 0.3012564182281494, + "learning_rate": 4.028627678149582e-07, + "loss": 0.050174713134765625, + "step": 13739 + }, + { + "epoch": 0.9287549006353927, + "grad_norm": 0.6074895858764648, + "learning_rate": 4.02103557163207e-07, + "loss": 0.112152099609375, + "step": 13740 + }, + { + "epoch": 0.9288224956063269, + "grad_norm": 0.5991268157958984, + "learning_rate": 4.0134505284846637e-07, + "loss": 0.11269378662109375, + "step": 13741 + }, + { + "epoch": 0.928890090577261, + "grad_norm": 0.9778159260749817, + "learning_rate": 4.0058725490744017e-07, + "loss": 0.1135711669921875, + "step": 13742 + }, + { + "epoch": 0.9289576855481952, + "grad_norm": 0.5874024629592896, + "learning_rate": 3.9983016337679923e-07, + "loss": 0.09439849853515625, + "step": 13743 + }, + { + "epoch": 0.9290252805191294, + "grad_norm": 0.9447702765464783, + "learning_rate": 3.9907377829316915e-07, + "loss": 0.14453887939453125, + "step": 13744 + }, + { + "epoch": 0.9290928754900636, + "grad_norm": 0.4830962121486664, + "learning_rate": 3.983180996931507e-07, + "loss": 0.07786369323730469, + "step": 13745 + }, + { + "epoch": 0.9291604704609977, + "grad_norm": 0.8437790870666504, + "learning_rate": 3.9756312761331293e-07, + "loss": 0.154815673828125, + "step": 13746 + }, + { + "epoch": 0.9292280654319318, + "grad_norm": 0.8953976035118103, + "learning_rate": 3.9680886209017996e-07, + "loss": 0.19976806640625, + "step": 13747 + }, + { + "epoch": 0.929295660402866, + "grad_norm": 0.6971070766448975, + "learning_rate": 3.960553031602526e-07, + "loss": 0.10446548461914062, + "step": 13748 + }, + { + "epoch": 0.9293632553738002, + "grad_norm": 1.2202039957046509, + "learning_rate": 3.953024508599884e-07, + "loss": 0.206146240234375, + "step": 13749 + }, + { + "epoch": 0.9294308503447344, + "grad_norm": 1.8436311483383179, + "learning_rate": 3.945503052258198e-07, + "loss": 0.14942169189453125, + "step": 13750 + }, + { + "epoch": 0.9294984453156685, + "grad_norm": 0.3583624064922333, + "learning_rate": 3.9379886629413606e-07, + "loss": 0.055248260498046875, + "step": 13751 + }, + { + "epoch": 0.9295660402866027, + "grad_norm": 0.7482712268829346, + "learning_rate": 3.9304813410129815e-07, + "loss": 0.1265106201171875, + "step": 13752 + }, + { + "epoch": 0.9296336352575368, + "grad_norm": 0.23915551602840424, + "learning_rate": 3.9229810868363024e-07, + "loss": 0.040130615234375, + "step": 13753 + }, + { + "epoch": 0.929701230228471, + "grad_norm": 0.38826993107795715, + "learning_rate": 3.9154879007742674e-07, + "loss": 0.062896728515625, + "step": 13754 + }, + { + "epoch": 0.9297688251994052, + "grad_norm": 0.7037742733955383, + "learning_rate": 3.908001783189419e-07, + "loss": 0.145599365234375, + "step": 13755 + }, + { + "epoch": 0.9298364201703393, + "grad_norm": 0.4771866798400879, + "learning_rate": 3.9005227344439675e-07, + "loss": 0.0824737548828125, + "step": 13756 + }, + { + "epoch": 0.9299040151412735, + "grad_norm": 0.9161847829818726, + "learning_rate": 3.893050754899807e-07, + "loss": 0.11696624755859375, + "step": 13757 + }, + { + "epoch": 0.9299716101122076, + "grad_norm": 0.17322172224521637, + "learning_rate": 3.8855858449184655e-07, + "loss": 0.017393112182617188, + "step": 13758 + }, + { + "epoch": 0.9300392050831419, + "grad_norm": 0.5497084856033325, + "learning_rate": 3.878128004861187e-07, + "loss": 0.0987091064453125, + "step": 13759 + }, + { + "epoch": 0.930106800054076, + "grad_norm": 0.4836820662021637, + "learning_rate": 3.870677235088765e-07, + "loss": 0.07956695556640625, + "step": 13760 + }, + { + "epoch": 0.9301743950250101, + "grad_norm": 0.39676663279533386, + "learning_rate": 3.863233535961763e-07, + "loss": 0.0842132568359375, + "step": 13761 + }, + { + "epoch": 0.9302419899959443, + "grad_norm": 0.5795385837554932, + "learning_rate": 3.855796907840342e-07, + "loss": 0.08978652954101562, + "step": 13762 + }, + { + "epoch": 0.9303095849668784, + "grad_norm": 1.6969449520111084, + "learning_rate": 3.848367351084331e-07, + "loss": 0.267822265625, + "step": 13763 + }, + { + "epoch": 0.9303771799378127, + "grad_norm": 0.6760218739509583, + "learning_rate": 3.840944866053209e-07, + "loss": 0.10196685791015625, + "step": 13764 + }, + { + "epoch": 0.9304447749087468, + "grad_norm": 0.6287387609481812, + "learning_rate": 3.833529453106138e-07, + "loss": 0.12528228759765625, + "step": 13765 + }, + { + "epoch": 0.930512369879681, + "grad_norm": 0.4811592102050781, + "learning_rate": 3.8261211126018824e-07, + "loss": 0.0611114501953125, + "step": 13766 + }, + { + "epoch": 0.9305799648506151, + "grad_norm": 0.6926131248474121, + "learning_rate": 3.8187198448989714e-07, + "loss": 0.09752273559570312, + "step": 13767 + }, + { + "epoch": 0.9306475598215492, + "grad_norm": 1.1693981885910034, + "learning_rate": 3.811325650355468e-07, + "loss": 0.207733154296875, + "step": 13768 + }, + { + "epoch": 0.9307151547924835, + "grad_norm": 0.5273762345314026, + "learning_rate": 3.803938529329154e-07, + "loss": 0.104095458984375, + "step": 13769 + }, + { + "epoch": 0.9307827497634176, + "grad_norm": 0.6886003613471985, + "learning_rate": 3.796558482177492e-07, + "loss": 0.10718154907226562, + "step": 13770 + }, + { + "epoch": 0.9308503447343518, + "grad_norm": 0.3870975971221924, + "learning_rate": 3.789185509257548e-07, + "loss": 0.0516815185546875, + "step": 13771 + }, + { + "epoch": 0.9309179397052859, + "grad_norm": 0.8168215155601501, + "learning_rate": 3.7818196109261017e-07, + "loss": 0.172760009765625, + "step": 13772 + }, + { + "epoch": 0.9309855346762201, + "grad_norm": 0.6502957940101624, + "learning_rate": 3.7744607875395355e-07, + "loss": 0.1090240478515625, + "step": 13773 + }, + { + "epoch": 0.9310531296471543, + "grad_norm": 0.6229110956192017, + "learning_rate": 3.767109039453914e-07, + "loss": 0.11420440673828125, + "step": 13774 + }, + { + "epoch": 0.9311207246180884, + "grad_norm": 0.2371794581413269, + "learning_rate": 3.759764367024987e-07, + "loss": 0.038562774658203125, + "step": 13775 + }, + { + "epoch": 0.9311883195890226, + "grad_norm": 0.26572948694229126, + "learning_rate": 3.752426770608103e-07, + "loss": 0.03729438781738281, + "step": 13776 + }, + { + "epoch": 0.9312559145599567, + "grad_norm": 1.1247284412384033, + "learning_rate": 3.7450962505583276e-07, + "loss": 0.2105712890625, + "step": 13777 + }, + { + "epoch": 0.9313235095308909, + "grad_norm": 1.1152757406234741, + "learning_rate": 3.737772807230344e-07, + "loss": 0.12982177734375, + "step": 13778 + }, + { + "epoch": 0.931391104501825, + "grad_norm": 0.2670609652996063, + "learning_rate": 3.7304564409785025e-07, + "loss": 0.037506103515625, + "step": 13779 + }, + { + "epoch": 0.9314586994727593, + "grad_norm": 0.3592638671398163, + "learning_rate": 3.7231471521568194e-07, + "loss": 0.04225921630859375, + "step": 13780 + }, + { + "epoch": 0.9315262944436934, + "grad_norm": 0.3950608968734741, + "learning_rate": 3.715844941118962e-07, + "loss": 0.02889871597290039, + "step": 13781 + }, + { + "epoch": 0.9315938894146275, + "grad_norm": 0.8481321334838867, + "learning_rate": 3.708549808218248e-07, + "loss": 0.179595947265625, + "step": 13782 + }, + { + "epoch": 0.9316614843855617, + "grad_norm": 0.5848328471183777, + "learning_rate": 3.701261753807694e-07, + "loss": 0.095367431640625, + "step": 13783 + }, + { + "epoch": 0.9317290793564958, + "grad_norm": 0.4058215618133545, + "learning_rate": 3.693980778239903e-07, + "loss": 0.0784912109375, + "step": 13784 + }, + { + "epoch": 0.9317966743274301, + "grad_norm": 0.3232368230819702, + "learning_rate": 3.686706881867208e-07, + "loss": 0.05747222900390625, + "step": 13785 + }, + { + "epoch": 0.9318642692983642, + "grad_norm": 1.1921459436416626, + "learning_rate": 3.679440065041512e-07, + "loss": 0.1768646240234375, + "step": 13786 + }, + { + "epoch": 0.9319318642692984, + "grad_norm": 0.9465631246566772, + "learning_rate": 3.672180328114483e-07, + "loss": 0.17413330078125, + "step": 13787 + }, + { + "epoch": 0.9319994592402325, + "grad_norm": 1.353044033050537, + "learning_rate": 3.6649276714373734e-07, + "loss": 0.20208740234375, + "step": 13788 + }, + { + "epoch": 0.9320670542111666, + "grad_norm": 1.1266921758651733, + "learning_rate": 3.65768209536112e-07, + "loss": 0.15666961669921875, + "step": 13789 + }, + { + "epoch": 0.9321346491821009, + "grad_norm": 0.3172195255756378, + "learning_rate": 3.650443600236292e-07, + "loss": 0.0538330078125, + "step": 13790 + }, + { + "epoch": 0.932202244153035, + "grad_norm": 0.34142976999282837, + "learning_rate": 3.6432121864131415e-07, + "loss": 0.055469512939453125, + "step": 13791 + }, + { + "epoch": 0.9322698391239692, + "grad_norm": 0.48572707176208496, + "learning_rate": 3.6359878542415735e-07, + "loss": 0.06290054321289062, + "step": 13792 + }, + { + "epoch": 0.9323374340949033, + "grad_norm": 0.7140254974365234, + "learning_rate": 3.628770604071124e-07, + "loss": 0.112579345703125, + "step": 13793 + }, + { + "epoch": 0.9324050290658376, + "grad_norm": 0.9646913409233093, + "learning_rate": 3.621560436251048e-07, + "loss": 0.12024116516113281, + "step": 13794 + }, + { + "epoch": 0.9324726240367717, + "grad_norm": 0.6002296209335327, + "learning_rate": 3.614357351130165e-07, + "loss": 0.11204147338867188, + "step": 13795 + }, + { + "epoch": 0.9325402190077058, + "grad_norm": 0.4237251281738281, + "learning_rate": 3.607161349057064e-07, + "loss": 0.0502471923828125, + "step": 13796 + }, + { + "epoch": 0.93260781397864, + "grad_norm": 0.8577638864517212, + "learning_rate": 3.599972430379883e-07, + "loss": 0.10581207275390625, + "step": 13797 + }, + { + "epoch": 0.9326754089495741, + "grad_norm": 0.47324061393737793, + "learning_rate": 3.5927905954464933e-07, + "loss": 0.0764923095703125, + "step": 13798 + }, + { + "epoch": 0.9327430039205084, + "grad_norm": 0.5697705745697021, + "learning_rate": 3.5856158446044005e-07, + "loss": 0.1020355224609375, + "step": 13799 + }, + { + "epoch": 0.9328105988914425, + "grad_norm": 0.2986949682235718, + "learning_rate": 3.5784481782007605e-07, + "loss": 0.032779693603515625, + "step": 13800 + }, + { + "epoch": 0.9328781938623767, + "grad_norm": 0.7662809491157532, + "learning_rate": 3.571287596582379e-07, + "loss": 0.156707763671875, + "step": 13801 + }, + { + "epoch": 0.9329457888333108, + "grad_norm": 0.7803893089294434, + "learning_rate": 3.5641341000957283e-07, + "loss": 0.1600799560546875, + "step": 13802 + }, + { + "epoch": 0.9330133838042449, + "grad_norm": 0.6898075342178345, + "learning_rate": 3.556987689086949e-07, + "loss": 0.12366485595703125, + "step": 13803 + }, + { + "epoch": 0.9330809787751791, + "grad_norm": 1.1828340291976929, + "learning_rate": 3.549848363901814e-07, + "loss": 0.15245819091796875, + "step": 13804 + }, + { + "epoch": 0.9331485737461133, + "grad_norm": 1.394313931465149, + "learning_rate": 3.5427161248857975e-07, + "loss": 0.1473236083984375, + "step": 13805 + }, + { + "epoch": 0.9332161687170475, + "grad_norm": 0.23246078193187714, + "learning_rate": 3.535590972383973e-07, + "loss": 0.033573150634765625, + "step": 13806 + }, + { + "epoch": 0.9332837636879816, + "grad_norm": 0.2585103213787079, + "learning_rate": 3.528472906741098e-07, + "loss": 0.024206161499023438, + "step": 13807 + }, + { + "epoch": 0.9333513586589158, + "grad_norm": 0.19606001675128937, + "learning_rate": 3.5213619283015977e-07, + "loss": 0.022962570190429688, + "step": 13808 + }, + { + "epoch": 0.93341895362985, + "grad_norm": 0.3879162073135376, + "learning_rate": 3.514258037409579e-07, + "loss": 0.0567626953125, + "step": 13809 + }, + { + "epoch": 0.9334865486007841, + "grad_norm": 0.321851521730423, + "learning_rate": 3.507161234408718e-07, + "loss": 0.05025482177734375, + "step": 13810 + }, + { + "epoch": 0.9335541435717183, + "grad_norm": 0.5378507375717163, + "learning_rate": 3.500071519642423e-07, + "loss": 0.1084747314453125, + "step": 13811 + }, + { + "epoch": 0.9336217385426524, + "grad_norm": 0.6632705330848694, + "learning_rate": 3.4929888934537523e-07, + "loss": 0.08954620361328125, + "step": 13812 + }, + { + "epoch": 0.9336893335135866, + "grad_norm": 0.6208032369613647, + "learning_rate": 3.4859133561853993e-07, + "loss": 0.11368942260742188, + "step": 13813 + }, + { + "epoch": 0.9337569284845207, + "grad_norm": 0.6938086748123169, + "learning_rate": 3.4788449081797236e-07, + "loss": 0.09332513809204102, + "step": 13814 + }, + { + "epoch": 0.933824523455455, + "grad_norm": 0.22843827307224274, + "learning_rate": 3.4717835497787177e-07, + "loss": 0.038692474365234375, + "step": 13815 + }, + { + "epoch": 0.9338921184263891, + "grad_norm": 0.42349761724472046, + "learning_rate": 3.464729281324075e-07, + "loss": 0.082427978515625, + "step": 13816 + }, + { + "epoch": 0.9339597133973232, + "grad_norm": 0.6718153357505798, + "learning_rate": 3.457682103157139e-07, + "loss": 0.1015777587890625, + "step": 13817 + }, + { + "epoch": 0.9340273083682574, + "grad_norm": 0.2181776612997055, + "learning_rate": 3.4506420156188713e-07, + "loss": 0.03621673583984375, + "step": 13818 + }, + { + "epoch": 0.9340949033391915, + "grad_norm": 0.2297561764717102, + "learning_rate": 3.443609019049915e-07, + "loss": 0.033626556396484375, + "step": 13819 + }, + { + "epoch": 0.9341624983101258, + "grad_norm": 0.8092092275619507, + "learning_rate": 3.436583113790598e-07, + "loss": 0.13897705078125, + "step": 13820 + }, + { + "epoch": 0.9342300932810599, + "grad_norm": 0.5626513957977295, + "learning_rate": 3.4295643001808316e-07, + "loss": 0.08516693115234375, + "step": 13821 + }, + { + "epoch": 0.934297688251994, + "grad_norm": 0.21652859449386597, + "learning_rate": 3.4225525785602785e-07, + "loss": 0.036945343017578125, + "step": 13822 + }, + { + "epoch": 0.9343652832229282, + "grad_norm": 0.21695679426193237, + "learning_rate": 3.4155479492681663e-07, + "loss": 0.03148841857910156, + "step": 13823 + }, + { + "epoch": 0.9344328781938623, + "grad_norm": 0.49915310740470886, + "learning_rate": 3.4085504126434575e-07, + "loss": 0.08811187744140625, + "step": 13824 + }, + { + "epoch": 0.9345004731647966, + "grad_norm": 0.4661088287830353, + "learning_rate": 3.4015599690247314e-07, + "loss": 0.06687545776367188, + "step": 13825 + }, + { + "epoch": 0.9345680681357307, + "grad_norm": 0.38218963146209717, + "learning_rate": 3.3945766187502e-07, + "loss": 0.05368804931640625, + "step": 13826 + }, + { + "epoch": 0.9346356631066649, + "grad_norm": 1.1618231534957886, + "learning_rate": 3.3876003621577944e-07, + "loss": 0.1649169921875, + "step": 13827 + }, + { + "epoch": 0.934703258077599, + "grad_norm": 0.3502139449119568, + "learning_rate": 3.38063119958506e-07, + "loss": 0.03846168518066406, + "step": 13828 + }, + { + "epoch": 0.9347708530485331, + "grad_norm": 0.607235312461853, + "learning_rate": 3.3736691313691947e-07, + "loss": 0.08301067352294922, + "step": 13829 + }, + { + "epoch": 0.9348384480194674, + "grad_norm": 0.9327041506767273, + "learning_rate": 3.3667141578470785e-07, + "loss": 0.1647796630859375, + "step": 13830 + }, + { + "epoch": 0.9349060429904015, + "grad_norm": 0.650485634803772, + "learning_rate": 3.3597662793552255e-07, + "loss": 0.1345672607421875, + "step": 13831 + }, + { + "epoch": 0.9349736379613357, + "grad_norm": 0.6070119738578796, + "learning_rate": 3.352825496229833e-07, + "loss": 0.11916351318359375, + "step": 13832 + }, + { + "epoch": 0.9350412329322698, + "grad_norm": 0.4077536463737488, + "learning_rate": 3.3458918088067336e-07, + "loss": 0.079437255859375, + "step": 13833 + }, + { + "epoch": 0.935108827903204, + "grad_norm": 1.0210893154144287, + "learning_rate": 3.338965217421408e-07, + "loss": 0.1398468017578125, + "step": 13834 + }, + { + "epoch": 0.9351764228741382, + "grad_norm": 0.7529950737953186, + "learning_rate": 3.332045722409055e-07, + "loss": 0.11392593383789062, + "step": 13835 + }, + { + "epoch": 0.9352440178450723, + "grad_norm": 0.599876344203949, + "learning_rate": 3.325133324104407e-07, + "loss": 0.1086883544921875, + "step": 13836 + }, + { + "epoch": 0.9353116128160065, + "grad_norm": 0.47647204995155334, + "learning_rate": 3.318228022841996e-07, + "loss": 0.06950187683105469, + "step": 13837 + }, + { + "epoch": 0.9353792077869406, + "grad_norm": 0.7262828946113586, + "learning_rate": 3.3113298189559217e-07, + "loss": 0.116607666015625, + "step": 13838 + }, + { + "epoch": 0.9354468027578748, + "grad_norm": 0.5349005460739136, + "learning_rate": 3.3044387127799335e-07, + "loss": 0.0954437255859375, + "step": 13839 + }, + { + "epoch": 0.935514397728809, + "grad_norm": 0.32372668385505676, + "learning_rate": 3.2975547046474987e-07, + "loss": 0.035884857177734375, + "step": 13840 + }, + { + "epoch": 0.9355819926997432, + "grad_norm": 0.5670439004898071, + "learning_rate": 3.2906777948917165e-07, + "loss": 0.11077117919921875, + "step": 13841 + }, + { + "epoch": 0.9356495876706773, + "grad_norm": 0.8534945845603943, + "learning_rate": 3.283807983845305e-07, + "loss": 0.1512603759765625, + "step": 13842 + }, + { + "epoch": 0.9357171826416114, + "grad_norm": 0.8029483556747437, + "learning_rate": 3.276945271840681e-07, + "loss": 0.13135528564453125, + "step": 13843 + }, + { + "epoch": 0.9357847776125456, + "grad_norm": 0.3082162141799927, + "learning_rate": 3.2700896592098964e-07, + "loss": 0.0684661865234375, + "step": 13844 + }, + { + "epoch": 0.9358523725834798, + "grad_norm": 0.43033406138420105, + "learning_rate": 3.2632411462846845e-07, + "loss": 0.07526397705078125, + "step": 13845 + }, + { + "epoch": 0.935919967554414, + "grad_norm": 1.0496793985366821, + "learning_rate": 3.256399733396431e-07, + "loss": 0.12744140625, + "step": 13846 + }, + { + "epoch": 0.9359875625253481, + "grad_norm": 0.6713554859161377, + "learning_rate": 3.249565420876122e-07, + "loss": 0.10294342041015625, + "step": 13847 + }, + { + "epoch": 0.9360551574962823, + "grad_norm": 0.6627672910690308, + "learning_rate": 3.242738209054491e-07, + "loss": 0.10543060302734375, + "step": 13848 + }, + { + "epoch": 0.9361227524672164, + "grad_norm": 0.7856249809265137, + "learning_rate": 3.235918098261842e-07, + "loss": 0.1473388671875, + "step": 13849 + }, + { + "epoch": 0.9361903474381505, + "grad_norm": 0.4056797921657562, + "learning_rate": 3.229105088828177e-07, + "loss": 0.05130767822265625, + "step": 13850 + }, + { + "epoch": 0.9362579424090848, + "grad_norm": 1.6833561658859253, + "learning_rate": 3.222299181083199e-07, + "loss": 0.1837921142578125, + "step": 13851 + }, + { + "epoch": 0.9363255373800189, + "grad_norm": 1.100171685218811, + "learning_rate": 3.215500375356162e-07, + "loss": 0.186920166015625, + "step": 13852 + }, + { + "epoch": 0.9363931323509531, + "grad_norm": 0.5159316658973694, + "learning_rate": 3.208708671976085e-07, + "loss": 0.071136474609375, + "step": 13853 + }, + { + "epoch": 0.9364607273218872, + "grad_norm": 0.5429823994636536, + "learning_rate": 3.201924071271539e-07, + "loss": 0.07747268676757812, + "step": 13854 + }, + { + "epoch": 0.9365283222928215, + "grad_norm": 0.36442050337791443, + "learning_rate": 3.195146573570862e-07, + "loss": 0.04708099365234375, + "step": 13855 + }, + { + "epoch": 0.9365959172637556, + "grad_norm": 1.1961109638214111, + "learning_rate": 3.1883761792019404e-07, + "loss": 0.1941680908203125, + "step": 13856 + }, + { + "epoch": 0.9366635122346897, + "grad_norm": 0.8371310830116272, + "learning_rate": 3.181612888492413e-07, + "loss": 0.10452461242675781, + "step": 13857 + }, + { + "epoch": 0.9367311072056239, + "grad_norm": 0.31805598735809326, + "learning_rate": 3.174856701769485e-07, + "loss": 0.05336761474609375, + "step": 13858 + }, + { + "epoch": 0.936798702176558, + "grad_norm": 0.5548998713493347, + "learning_rate": 3.168107619360111e-07, + "loss": 0.10790252685546875, + "step": 13859 + }, + { + "epoch": 0.9368662971474923, + "grad_norm": 0.9776374101638794, + "learning_rate": 3.1613656415907967e-07, + "loss": 0.1822967529296875, + "step": 13860 + }, + { + "epoch": 0.9369338921184264, + "grad_norm": 0.3469814956188202, + "learning_rate": 3.154630768787814e-07, + "loss": 0.0581207275390625, + "step": 13861 + }, + { + "epoch": 0.9370014870893606, + "grad_norm": 0.9234762787818909, + "learning_rate": 3.147903001277019e-07, + "loss": 0.16301727294921875, + "step": 13862 + }, + { + "epoch": 0.9370690820602947, + "grad_norm": 0.7311210036277771, + "learning_rate": 3.141182339383919e-07, + "loss": 0.1221923828125, + "step": 13863 + }, + { + "epoch": 0.9371366770312288, + "grad_norm": 0.48762375116348267, + "learning_rate": 3.1344687834337524e-07, + "loss": 0.0904083251953125, + "step": 13864 + }, + { + "epoch": 0.937204272002163, + "grad_norm": 0.6568927764892578, + "learning_rate": 3.127762333751294e-07, + "loss": 0.1248779296875, + "step": 13865 + }, + { + "epoch": 0.9372718669730972, + "grad_norm": 0.9557911157608032, + "learning_rate": 3.1210629906611164e-07, + "loss": 0.1522369384765625, + "step": 13866 + }, + { + "epoch": 0.9373394619440314, + "grad_norm": 0.334881991147995, + "learning_rate": 3.1143707544873277e-07, + "loss": 0.05837249755859375, + "step": 13867 + }, + { + "epoch": 0.9374070569149655, + "grad_norm": 1.0591648817062378, + "learning_rate": 3.107685625553752e-07, + "loss": 0.1848907470703125, + "step": 13868 + }, + { + "epoch": 0.9374746518858997, + "grad_norm": 0.7138329148292542, + "learning_rate": 3.1010076041838643e-07, + "loss": 0.0787973403930664, + "step": 13869 + }, + { + "epoch": 0.9375422468568338, + "grad_norm": 0.3206420838832855, + "learning_rate": 3.0943366907007885e-07, + "loss": 0.0544281005859375, + "step": 13870 + }, + { + "epoch": 0.937609841827768, + "grad_norm": 0.6834675073623657, + "learning_rate": 3.087672885427284e-07, + "loss": 0.120269775390625, + "step": 13871 + }, + { + "epoch": 0.9376774367987022, + "grad_norm": 0.9862921833992004, + "learning_rate": 3.081016188685826e-07, + "loss": 0.168304443359375, + "step": 13872 + }, + { + "epoch": 0.9377450317696363, + "grad_norm": 0.8516219854354858, + "learning_rate": 3.0743666007984727e-07, + "loss": 0.1497039794921875, + "step": 13873 + }, + { + "epoch": 0.9378126267405705, + "grad_norm": 0.557670533657074, + "learning_rate": 3.067724122086968e-07, + "loss": 0.1041107177734375, + "step": 13874 + }, + { + "epoch": 0.9378802217115046, + "grad_norm": 0.33553943037986755, + "learning_rate": 3.06108875287277e-07, + "loss": 0.058498382568359375, + "step": 13875 + }, + { + "epoch": 0.9379478166824389, + "grad_norm": 0.9083653688430786, + "learning_rate": 3.0544604934768573e-07, + "loss": 0.141998291015625, + "step": 13876 + }, + { + "epoch": 0.938015411653373, + "grad_norm": 1.0683836936950684, + "learning_rate": 3.047839344220038e-07, + "loss": 0.11202239990234375, + "step": 13877 + }, + { + "epoch": 0.9380830066243071, + "grad_norm": 0.7366772890090942, + "learning_rate": 3.04122530542259e-07, + "loss": 0.07342147827148438, + "step": 13878 + }, + { + "epoch": 0.9381506015952413, + "grad_norm": 0.7841299176216125, + "learning_rate": 3.034618377404608e-07, + "loss": 0.1483612060546875, + "step": 13879 + }, + { + "epoch": 0.9382181965661754, + "grad_norm": 0.25430211424827576, + "learning_rate": 3.028018560485768e-07, + "loss": 0.043621063232421875, + "step": 13880 + }, + { + "epoch": 0.9382857915371097, + "grad_norm": 0.7437521815299988, + "learning_rate": 3.021425854985382e-07, + "loss": 0.1345672607421875, + "step": 13881 + }, + { + "epoch": 0.9383533865080438, + "grad_norm": 1.811621904373169, + "learning_rate": 3.014840261222479e-07, + "loss": 0.212677001953125, + "step": 13882 + }, + { + "epoch": 0.938420981478978, + "grad_norm": 0.7806892395019531, + "learning_rate": 3.0082617795156865e-07, + "loss": 0.1333465576171875, + "step": 13883 + }, + { + "epoch": 0.9384885764499121, + "grad_norm": 0.7727290391921997, + "learning_rate": 3.0016904101833163e-07, + "loss": 0.1373138427734375, + "step": 13884 + }, + { + "epoch": 0.9385561714208462, + "grad_norm": 0.37090227007865906, + "learning_rate": 2.995126153543348e-07, + "loss": 0.08496856689453125, + "step": 13885 + }, + { + "epoch": 0.9386237663917805, + "grad_norm": 0.5382661819458008, + "learning_rate": 2.9885690099133776e-07, + "loss": 0.0755767822265625, + "step": 13886 + }, + { + "epoch": 0.9386913613627146, + "grad_norm": 1.2155427932739258, + "learning_rate": 2.9820189796106847e-07, + "loss": 0.19378662109375, + "step": 13887 + }, + { + "epoch": 0.9387589563336488, + "grad_norm": 0.700177013874054, + "learning_rate": 2.975476062952248e-07, + "loss": 0.1304473876953125, + "step": 13888 + }, + { + "epoch": 0.9388265513045829, + "grad_norm": 0.2471131831407547, + "learning_rate": 2.9689402602545657e-07, + "loss": 0.048313140869140625, + "step": 13889 + }, + { + "epoch": 0.9388941462755171, + "grad_norm": 0.5744349360466003, + "learning_rate": 2.9624115718339674e-07, + "loss": 0.08551025390625, + "step": 13890 + }, + { + "epoch": 0.9389617412464513, + "grad_norm": 0.7914925217628479, + "learning_rate": 2.9558899980063003e-07, + "loss": 0.108734130859375, + "step": 13891 + }, + { + "epoch": 0.9390293362173854, + "grad_norm": 0.30272597074508667, + "learning_rate": 2.949375539087129e-07, + "loss": 0.056060791015625, + "step": 13892 + }, + { + "epoch": 0.9390969311883196, + "grad_norm": 1.2066367864608765, + "learning_rate": 2.9428681953916683e-07, + "loss": 0.12911033630371094, + "step": 13893 + }, + { + "epoch": 0.9391645261592537, + "grad_norm": 0.30164599418640137, + "learning_rate": 2.9363679672347985e-07, + "loss": 0.03002643585205078, + "step": 13894 + }, + { + "epoch": 0.939232121130188, + "grad_norm": 0.5152181386947632, + "learning_rate": 2.929874854931003e-07, + "loss": 0.0871572494506836, + "step": 13895 + }, + { + "epoch": 0.9392997161011221, + "grad_norm": 0.3232951760292053, + "learning_rate": 2.9233888587944955e-07, + "loss": 0.034064292907714844, + "step": 13896 + }, + { + "epoch": 0.9393673110720563, + "grad_norm": 0.627386748790741, + "learning_rate": 2.9169099791390927e-07, + "loss": 0.103057861328125, + "step": 13897 + }, + { + "epoch": 0.9394349060429904, + "grad_norm": 0.732326090335846, + "learning_rate": 2.9104382162782774e-07, + "loss": 0.10994720458984375, + "step": 13898 + }, + { + "epoch": 0.9395025010139245, + "grad_norm": 1.2052068710327148, + "learning_rate": 2.9039735705252144e-07, + "loss": 0.1352691650390625, + "step": 13899 + }, + { + "epoch": 0.9395700959848587, + "grad_norm": 1.1954008340835571, + "learning_rate": 2.897516042192672e-07, + "loss": 0.1122283935546875, + "step": 13900 + }, + { + "epoch": 0.9396376909557929, + "grad_norm": 0.4688977897167206, + "learning_rate": 2.891065631593148e-07, + "loss": 0.1013641357421875, + "step": 13901 + }, + { + "epoch": 0.9397052859267271, + "grad_norm": 1.2521718740463257, + "learning_rate": 2.884622339038695e-07, + "loss": 0.22357177734375, + "step": 13902 + }, + { + "epoch": 0.9397728808976612, + "grad_norm": 0.32188674807548523, + "learning_rate": 2.878186164841129e-07, + "loss": 0.0648956298828125, + "step": 13903 + }, + { + "epoch": 0.9398404758685954, + "grad_norm": 0.3079216182231903, + "learning_rate": 2.871757109311868e-07, + "loss": 0.040676116943359375, + "step": 13904 + }, + { + "epoch": 0.9399080708395295, + "grad_norm": 1.277836799621582, + "learning_rate": 2.865335172761979e-07, + "loss": 0.19305419921875, + "step": 13905 + }, + { + "epoch": 0.9399756658104637, + "grad_norm": 0.5392205715179443, + "learning_rate": 2.8589203555021815e-07, + "loss": 0.109130859375, + "step": 13906 + }, + { + "epoch": 0.9400432607813979, + "grad_norm": 0.2716379761695862, + "learning_rate": 2.8525126578428927e-07, + "loss": 0.0532989501953125, + "step": 13907 + }, + { + "epoch": 0.940110855752332, + "grad_norm": 0.9875694513320923, + "learning_rate": 2.8461120800941323e-07, + "loss": 0.20526123046875, + "step": 13908 + }, + { + "epoch": 0.9401784507232662, + "grad_norm": 0.6044234037399292, + "learning_rate": 2.839718622565618e-07, + "loss": 0.0938720703125, + "step": 13909 + }, + { + "epoch": 0.9402460456942003, + "grad_norm": 0.9652249813079834, + "learning_rate": 2.8333322855666867e-07, + "loss": 0.168792724609375, + "step": 13910 + }, + { + "epoch": 0.9403136406651346, + "grad_norm": 0.9030604362487793, + "learning_rate": 2.8269530694063737e-07, + "loss": 0.13513946533203125, + "step": 13911 + }, + { + "epoch": 0.9403812356360687, + "grad_norm": 0.5329641699790955, + "learning_rate": 2.8205809743933164e-07, + "loss": 0.10051727294921875, + "step": 13912 + }, + { + "epoch": 0.9404488306070028, + "grad_norm": 0.6408283710479736, + "learning_rate": 2.8142160008358343e-07, + "loss": 0.07148170471191406, + "step": 13913 + }, + { + "epoch": 0.940516425577937, + "grad_norm": 0.4840846061706543, + "learning_rate": 2.807858149041964e-07, + "loss": 0.07209205627441406, + "step": 13914 + }, + { + "epoch": 0.9405840205488711, + "grad_norm": 1.2060941457748413, + "learning_rate": 2.8015074193192605e-07, + "loss": 0.209259033203125, + "step": 13915 + }, + { + "epoch": 0.9406516155198054, + "grad_norm": 0.5828572511672974, + "learning_rate": 2.79516381197506e-07, + "loss": 0.09341812133789062, + "step": 13916 + }, + { + "epoch": 0.9407192104907395, + "grad_norm": 1.1089420318603516, + "learning_rate": 2.7888273273162844e-07, + "loss": 0.152099609375, + "step": 13917 + }, + { + "epoch": 0.9407868054616737, + "grad_norm": 0.7837980389595032, + "learning_rate": 2.7824979656495387e-07, + "loss": 0.092315673828125, + "step": 13918 + }, + { + "epoch": 0.9408544004326078, + "grad_norm": 0.47370022535324097, + "learning_rate": 2.776175727281077e-07, + "loss": 0.094268798828125, + "step": 13919 + }, + { + "epoch": 0.9409219954035419, + "grad_norm": 0.24523243308067322, + "learning_rate": 2.7698606125168223e-07, + "loss": 0.04753875732421875, + "step": 13920 + }, + { + "epoch": 0.9409895903744762, + "grad_norm": 0.5334574580192566, + "learning_rate": 2.763552621662313e-07, + "loss": 0.08456802368164062, + "step": 13921 + }, + { + "epoch": 0.9410571853454103, + "grad_norm": 1.3860849142074585, + "learning_rate": 2.757251755022788e-07, + "loss": 0.13997650146484375, + "step": 13922 + }, + { + "epoch": 0.9411247803163445, + "grad_norm": 1.0683711767196655, + "learning_rate": 2.750958012903121e-07, + "loss": 0.16495513916015625, + "step": 13923 + }, + { + "epoch": 0.9411923752872786, + "grad_norm": 0.6165237426757812, + "learning_rate": 2.7446713956078176e-07, + "loss": 0.111480712890625, + "step": 13924 + }, + { + "epoch": 0.9412599702582128, + "grad_norm": 0.8285195231437683, + "learning_rate": 2.738391903441101e-07, + "loss": 0.156005859375, + "step": 13925 + }, + { + "epoch": 0.941327565229147, + "grad_norm": 0.7471631765365601, + "learning_rate": 2.7321195367067787e-07, + "loss": 0.09467315673828125, + "step": 13926 + }, + { + "epoch": 0.9413951602000811, + "grad_norm": 0.3849690556526184, + "learning_rate": 2.7258542957083744e-07, + "loss": 0.050045013427734375, + "step": 13927 + }, + { + "epoch": 0.9414627551710153, + "grad_norm": 1.660218358039856, + "learning_rate": 2.719596180749012e-07, + "loss": 0.1297016143798828, + "step": 13928 + }, + { + "epoch": 0.9415303501419494, + "grad_norm": 0.30407097935676575, + "learning_rate": 2.713345192131533e-07, + "loss": 0.05037689208984375, + "step": 13929 + }, + { + "epoch": 0.9415979451128836, + "grad_norm": 0.30466508865356445, + "learning_rate": 2.707101330158379e-07, + "loss": 0.042606353759765625, + "step": 13930 + }, + { + "epoch": 0.9416655400838178, + "grad_norm": 0.466526061296463, + "learning_rate": 2.700864595131658e-07, + "loss": 0.1001434326171875, + "step": 13931 + }, + { + "epoch": 0.941733135054752, + "grad_norm": 0.3451281487941742, + "learning_rate": 2.694634987353145e-07, + "loss": 0.0676422119140625, + "step": 13932 + }, + { + "epoch": 0.9418007300256861, + "grad_norm": 0.40444186329841614, + "learning_rate": 2.6884125071242824e-07, + "loss": 0.05657958984375, + "step": 13933 + }, + { + "epoch": 0.9418683249966202, + "grad_norm": 0.4342517554759979, + "learning_rate": 2.6821971547461466e-07, + "loss": 0.0712432861328125, + "step": 13934 + }, + { + "epoch": 0.9419359199675544, + "grad_norm": 0.9268073439598083, + "learning_rate": 2.6759889305194794e-07, + "loss": 0.157135009765625, + "step": 13935 + }, + { + "epoch": 0.9420035149384886, + "grad_norm": 1.7690154314041138, + "learning_rate": 2.6697878347446583e-07, + "loss": 0.235321044921875, + "step": 13936 + }, + { + "epoch": 0.9420711099094228, + "grad_norm": 0.9279530048370361, + "learning_rate": 2.6635938677217255e-07, + "loss": 0.12378692626953125, + "step": 13937 + }, + { + "epoch": 0.9421387048803569, + "grad_norm": 0.19337165355682373, + "learning_rate": 2.657407029750408e-07, + "loss": 0.031585693359375, + "step": 13938 + }, + { + "epoch": 0.9422062998512911, + "grad_norm": 0.21446029841899872, + "learning_rate": 2.651227321130051e-07, + "loss": 0.025241851806640625, + "step": 13939 + }, + { + "epoch": 0.9422738948222252, + "grad_norm": 0.9190371036529541, + "learning_rate": 2.6450547421596807e-07, + "loss": 0.13193511962890625, + "step": 13940 + }, + { + "epoch": 0.9423414897931593, + "grad_norm": 0.8434849977493286, + "learning_rate": 2.638889293137925e-07, + "loss": 0.166259765625, + "step": 13941 + }, + { + "epoch": 0.9424090847640936, + "grad_norm": 0.5176019072532654, + "learning_rate": 2.6327309743631457e-07, + "loss": 0.0776519775390625, + "step": 13942 + }, + { + "epoch": 0.9424766797350277, + "grad_norm": 0.4095512330532074, + "learning_rate": 2.626579786133321e-07, + "loss": 0.0614776611328125, + "step": 13943 + }, + { + "epoch": 0.9425442747059619, + "grad_norm": 0.23340609669685364, + "learning_rate": 2.620435728746062e-07, + "loss": 0.038799285888671875, + "step": 13944 + }, + { + "epoch": 0.942611869676896, + "grad_norm": 1.0799522399902344, + "learning_rate": 2.614298802498666e-07, + "loss": 0.159423828125, + "step": 13945 + }, + { + "epoch": 0.9426794646478303, + "grad_norm": 1.1707035303115845, + "learning_rate": 2.608169007688077e-07, + "loss": 0.187896728515625, + "step": 13946 + }, + { + "epoch": 0.9427470596187644, + "grad_norm": 0.7442922592163086, + "learning_rate": 2.602046344610892e-07, + "loss": 0.135711669921875, + "step": 13947 + }, + { + "epoch": 0.9428146545896985, + "grad_norm": 0.9281613826751709, + "learning_rate": 2.595930813563374e-07, + "loss": 0.13128280639648438, + "step": 13948 + }, + { + "epoch": 0.9428822495606327, + "grad_norm": 0.7721624970436096, + "learning_rate": 2.589822414841403e-07, + "loss": 0.149688720703125, + "step": 13949 + }, + { + "epoch": 0.9429498445315668, + "grad_norm": 0.6747879385948181, + "learning_rate": 2.583721148740542e-07, + "loss": 0.11998748779296875, + "step": 13950 + }, + { + "epoch": 0.943017439502501, + "grad_norm": 0.26973918080329895, + "learning_rate": 2.5776270155560724e-07, + "loss": 0.0494842529296875, + "step": 13951 + }, + { + "epoch": 0.9430850344734352, + "grad_norm": 0.6913848519325256, + "learning_rate": 2.571540015582774e-07, + "loss": 0.1245269775390625, + "step": 13952 + }, + { + "epoch": 0.9431526294443693, + "grad_norm": 0.6594341397285461, + "learning_rate": 2.565460149115245e-07, + "loss": 0.14080810546875, + "step": 13953 + }, + { + "epoch": 0.9432202244153035, + "grad_norm": 0.5289134979248047, + "learning_rate": 2.5593874164476337e-07, + "loss": 0.0711212158203125, + "step": 13954 + }, + { + "epoch": 0.9432878193862376, + "grad_norm": 1.1471407413482666, + "learning_rate": 2.553321817873755e-07, + "loss": 0.193511962890625, + "step": 13955 + }, + { + "epoch": 0.9433554143571719, + "grad_norm": 0.4858635663986206, + "learning_rate": 2.5472633536871736e-07, + "loss": 0.04590415954589844, + "step": 13956 + }, + { + "epoch": 0.943423009328106, + "grad_norm": 0.5375029444694519, + "learning_rate": 2.541212024180939e-07, + "loss": 0.0894622802734375, + "step": 13957 + }, + { + "epoch": 0.9434906042990402, + "grad_norm": 0.8274138569831848, + "learning_rate": 2.5351678296479333e-07, + "loss": 0.1397552490234375, + "step": 13958 + }, + { + "epoch": 0.9435581992699743, + "grad_norm": 1.009204387664795, + "learning_rate": 2.529130770380589e-07, + "loss": 0.15145111083984375, + "step": 13959 + }, + { + "epoch": 0.9436257942409084, + "grad_norm": 0.3549056649208069, + "learning_rate": 2.523100846670989e-07, + "loss": 0.06257247924804688, + "step": 13960 + }, + { + "epoch": 0.9436933892118426, + "grad_norm": 0.6789034605026245, + "learning_rate": 2.517078058810934e-07, + "loss": 0.08531951904296875, + "step": 13961 + }, + { + "epoch": 0.9437609841827768, + "grad_norm": 0.2764016091823578, + "learning_rate": 2.511062407091824e-07, + "loss": 0.04373931884765625, + "step": 13962 + }, + { + "epoch": 0.943828579153711, + "grad_norm": 0.8803733587265015, + "learning_rate": 2.505053891804726e-07, + "loss": 0.13577651977539062, + "step": 13963 + }, + { + "epoch": 0.9438961741246451, + "grad_norm": 0.8231573104858398, + "learning_rate": 2.4990525132404074e-07, + "loss": 0.11373138427734375, + "step": 13964 + }, + { + "epoch": 0.9439637690955793, + "grad_norm": 1.4356948137283325, + "learning_rate": 2.4930582716892024e-07, + "loss": 0.2186279296875, + "step": 13965 + }, + { + "epoch": 0.9440313640665134, + "grad_norm": 1.1651681661605835, + "learning_rate": 2.4870711674411964e-07, + "loss": 0.203582763671875, + "step": 13966 + }, + { + "epoch": 0.9440989590374476, + "grad_norm": 0.4674752652645111, + "learning_rate": 2.4810912007860566e-07, + "loss": 0.085662841796875, + "step": 13967 + }, + { + "epoch": 0.9441665540083818, + "grad_norm": 0.4444107413291931, + "learning_rate": 2.4751183720131185e-07, + "loss": 0.07564735412597656, + "step": 13968 + }, + { + "epoch": 0.9442341489793159, + "grad_norm": 0.22527411580085754, + "learning_rate": 2.4691526814114187e-07, + "loss": 0.024221420288085938, + "step": 13969 + }, + { + "epoch": 0.9443017439502501, + "grad_norm": 0.4634294807910919, + "learning_rate": 2.463194129269575e-07, + "loss": 0.07219696044921875, + "step": 13970 + }, + { + "epoch": 0.9443693389211842, + "grad_norm": 0.5859566926956177, + "learning_rate": 2.4572427158759405e-07, + "loss": 0.0939788818359375, + "step": 13971 + }, + { + "epoch": 0.9444369338921185, + "grad_norm": 0.7617242932319641, + "learning_rate": 2.4512984415184515e-07, + "loss": 0.105865478515625, + "step": 13972 + }, + { + "epoch": 0.9445045288630526, + "grad_norm": 1.2785369157791138, + "learning_rate": 2.445361306484745e-07, + "loss": 0.135009765625, + "step": 13973 + }, + { + "epoch": 0.9445721238339867, + "grad_norm": 0.9329879879951477, + "learning_rate": 2.43943131106209e-07, + "loss": 0.11962890625, + "step": 13974 + }, + { + "epoch": 0.9446397188049209, + "grad_norm": 0.21328414976596832, + "learning_rate": 2.4335084555374256e-07, + "loss": 0.03980255126953125, + "step": 13975 + }, + { + "epoch": 0.944707313775855, + "grad_norm": 0.24945147335529327, + "learning_rate": 2.427592740197304e-07, + "loss": 0.054218292236328125, + "step": 13976 + }, + { + "epoch": 0.9447749087467893, + "grad_norm": 0.7750530242919922, + "learning_rate": 2.4216841653280143e-07, + "loss": 0.165618896484375, + "step": 13977 + }, + { + "epoch": 0.9448425037177234, + "grad_norm": 0.8315625190734863, + "learning_rate": 2.4157827312154103e-07, + "loss": 0.174041748046875, + "step": 13978 + }, + { + "epoch": 0.9449100986886576, + "grad_norm": 0.8298495411872864, + "learning_rate": 2.409888438145047e-07, + "loss": 0.1466217041015625, + "step": 13979 + }, + { + "epoch": 0.9449776936595917, + "grad_norm": 1.5283759832382202, + "learning_rate": 2.404001286402163e-07, + "loss": 0.181610107421875, + "step": 13980 + }, + { + "epoch": 0.9450452886305258, + "grad_norm": 0.7919974327087402, + "learning_rate": 2.3981212762715475e-07, + "loss": 0.15818023681640625, + "step": 13981 + }, + { + "epoch": 0.9451128836014601, + "grad_norm": 0.8624483942985535, + "learning_rate": 2.392248408037773e-07, + "loss": 0.1398468017578125, + "step": 13982 + }, + { + "epoch": 0.9451804785723942, + "grad_norm": 0.4720739722251892, + "learning_rate": 2.386382681984994e-07, + "loss": 0.08313369750976562, + "step": 13983 + }, + { + "epoch": 0.9452480735433284, + "grad_norm": 0.3545345664024353, + "learning_rate": 2.3805240983970023e-07, + "loss": 0.04567587375640869, + "step": 13984 + }, + { + "epoch": 0.9453156685142625, + "grad_norm": 0.7968055009841919, + "learning_rate": 2.374672657557303e-07, + "loss": 0.1186370849609375, + "step": 13985 + }, + { + "epoch": 0.9453832634851967, + "grad_norm": 0.38179177045822144, + "learning_rate": 2.3688283597490035e-07, + "loss": 0.08172607421875, + "step": 13986 + }, + { + "epoch": 0.9454508584561309, + "grad_norm": 0.2440146803855896, + "learning_rate": 2.3629912052548942e-07, + "loss": 0.0469970703125, + "step": 13987 + }, + { + "epoch": 0.945518453427065, + "grad_norm": 0.2463339865207672, + "learning_rate": 2.3571611943574323e-07, + "loss": 0.031429290771484375, + "step": 13988 + }, + { + "epoch": 0.9455860483979992, + "grad_norm": 0.7587789297103882, + "learning_rate": 2.3513383273386756e-07, + "loss": 0.1265869140625, + "step": 13989 + }, + { + "epoch": 0.9456536433689333, + "grad_norm": 0.2745189666748047, + "learning_rate": 2.345522604480399e-07, + "loss": 0.03937530517578125, + "step": 13990 + }, + { + "epoch": 0.9457212383398675, + "grad_norm": 0.24941226840019226, + "learning_rate": 2.3397140260639772e-07, + "loss": 0.03634452819824219, + "step": 13991 + }, + { + "epoch": 0.9457888333108017, + "grad_norm": 0.8924670219421387, + "learning_rate": 2.333912592370485e-07, + "loss": 0.1366119384765625, + "step": 13992 + }, + { + "epoch": 0.9458564282817359, + "grad_norm": 0.9241542816162109, + "learning_rate": 2.3281183036806318e-07, + "loss": 0.14052963256835938, + "step": 13993 + }, + { + "epoch": 0.94592402325267, + "grad_norm": 0.29716411232948303, + "learning_rate": 2.3223311602747765e-07, + "loss": 0.0545501708984375, + "step": 13994 + }, + { + "epoch": 0.9459916182236041, + "grad_norm": 0.5430841445922852, + "learning_rate": 2.3165511624329284e-07, + "loss": 0.084869384765625, + "step": 13995 + }, + { + "epoch": 0.9460592131945383, + "grad_norm": 0.5731961131095886, + "learning_rate": 2.31077831043478e-07, + "loss": 0.0912322998046875, + "step": 13996 + }, + { + "epoch": 0.9461268081654725, + "grad_norm": 0.26900261640548706, + "learning_rate": 2.305012604559642e-07, + "loss": 0.04736328125, + "step": 13997 + }, + { + "epoch": 0.9461944031364067, + "grad_norm": 0.9469571113586426, + "learning_rate": 2.2992540450865075e-07, + "loss": 0.168182373046875, + "step": 13998 + }, + { + "epoch": 0.9462619981073408, + "grad_norm": 0.49601665139198303, + "learning_rate": 2.2935026322939868e-07, + "loss": 0.100860595703125, + "step": 13999 + }, + { + "epoch": 0.946329593078275, + "grad_norm": 0.26082783937454224, + "learning_rate": 2.2877583664604073e-07, + "loss": 0.041388511657714844, + "step": 14000 + }, + { + "epoch": 0.9463971880492091, + "grad_norm": 0.2680260241031647, + "learning_rate": 2.2820212478636804e-07, + "loss": 0.040496826171875, + "step": 14001 + }, + { + "epoch": 0.9464647830201433, + "grad_norm": 1.1951161623001099, + "learning_rate": 2.2762912767814005e-07, + "loss": 0.186859130859375, + "step": 14002 + }, + { + "epoch": 0.9465323779910775, + "grad_norm": 0.2628309726715088, + "learning_rate": 2.2705684534908455e-07, + "loss": 0.036846160888671875, + "step": 14003 + }, + { + "epoch": 0.9465999729620116, + "grad_norm": 0.35140183568000793, + "learning_rate": 2.264852778268911e-07, + "loss": 0.05548095703125, + "step": 14004 + }, + { + "epoch": 0.9466675679329458, + "grad_norm": 0.4754630923271179, + "learning_rate": 2.2591442513921423e-07, + "loss": 0.049968719482421875, + "step": 14005 + }, + { + "epoch": 0.9467351629038799, + "grad_norm": 0.34862789511680603, + "learning_rate": 2.253442873136785e-07, + "loss": 0.0403900146484375, + "step": 14006 + }, + { + "epoch": 0.9468027578748142, + "grad_norm": 1.635172724723816, + "learning_rate": 2.2477486437786522e-07, + "loss": 0.1778717041015625, + "step": 14007 + }, + { + "epoch": 0.9468703528457483, + "grad_norm": 1.1224216222763062, + "learning_rate": 2.2420615635933395e-07, + "loss": 0.181488037109375, + "step": 14008 + }, + { + "epoch": 0.9469379478166824, + "grad_norm": 0.34945088624954224, + "learning_rate": 2.2363816328559605e-07, + "loss": 0.05733489990234375, + "step": 14009 + }, + { + "epoch": 0.9470055427876166, + "grad_norm": 0.26986443996429443, + "learning_rate": 2.2307088518413787e-07, + "loss": 0.044147491455078125, + "step": 14010 + }, + { + "epoch": 0.9470731377585507, + "grad_norm": 1.2824362516403198, + "learning_rate": 2.2250432208240746e-07, + "loss": 0.1275634765625, + "step": 14011 + }, + { + "epoch": 0.947140732729485, + "grad_norm": 0.525834321975708, + "learning_rate": 2.2193847400781786e-07, + "loss": 0.08396148681640625, + "step": 14012 + }, + { + "epoch": 0.9472083277004191, + "grad_norm": 1.0993871688842773, + "learning_rate": 2.2137334098774886e-07, + "loss": 0.1658935546875, + "step": 14013 + }, + { + "epoch": 0.9472759226713533, + "grad_norm": 1.1226032972335815, + "learning_rate": 2.2080892304954525e-07, + "loss": 0.19537353515625, + "step": 14014 + }, + { + "epoch": 0.9473435176422874, + "grad_norm": 0.22223396599292755, + "learning_rate": 2.2024522022051518e-07, + "loss": 0.03213214874267578, + "step": 14015 + }, + { + "epoch": 0.9474111126132215, + "grad_norm": 0.3645428419113159, + "learning_rate": 2.1968223252793683e-07, + "loss": 0.04682159423828125, + "step": 14016 + }, + { + "epoch": 0.9474787075841558, + "grad_norm": 0.9256634712219238, + "learning_rate": 2.191199599990501e-07, + "loss": 0.196014404296875, + "step": 14017 + }, + { + "epoch": 0.9475463025550899, + "grad_norm": 1.559762716293335, + "learning_rate": 2.1855840266105987e-07, + "loss": 0.1356658935546875, + "step": 14018 + }, + { + "epoch": 0.9476138975260241, + "grad_norm": 0.3531901240348816, + "learning_rate": 2.1799756054114106e-07, + "loss": 0.05167388916015625, + "step": 14019 + }, + { + "epoch": 0.9476814924969582, + "grad_norm": 0.6232696175575256, + "learning_rate": 2.1743743366642533e-07, + "loss": 0.09345436096191406, + "step": 14020 + }, + { + "epoch": 0.9477490874678924, + "grad_norm": 1.132768988609314, + "learning_rate": 2.1687802206402097e-07, + "loss": 0.213409423828125, + "step": 14021 + }, + { + "epoch": 0.9478166824388266, + "grad_norm": 0.46876260638237, + "learning_rate": 2.1631932576099135e-07, + "loss": 0.1099853515625, + "step": 14022 + }, + { + "epoch": 0.9478842774097607, + "grad_norm": 0.8014694452285767, + "learning_rate": 2.1576134478437315e-07, + "loss": 0.15814208984375, + "step": 14023 + }, + { + "epoch": 0.9479518723806949, + "grad_norm": 0.6738196611404419, + "learning_rate": 2.1520407916116313e-07, + "loss": 0.08160018920898438, + "step": 14024 + }, + { + "epoch": 0.948019467351629, + "grad_norm": 0.6237020492553711, + "learning_rate": 2.1464752891832473e-07, + "loss": 0.1129608154296875, + "step": 14025 + }, + { + "epoch": 0.9480870623225632, + "grad_norm": 0.365264356136322, + "learning_rate": 2.1409169408278806e-07, + "loss": 0.06835365295410156, + "step": 14026 + }, + { + "epoch": 0.9481546572934973, + "grad_norm": 1.4124383926391602, + "learning_rate": 2.1353657468144828e-07, + "loss": 0.12563133239746094, + "step": 14027 + }, + { + "epoch": 0.9482222522644316, + "grad_norm": 0.6862981915473938, + "learning_rate": 2.129821707411639e-07, + "loss": 0.11738967895507812, + "step": 14028 + }, + { + "epoch": 0.9482898472353657, + "grad_norm": 0.9097564816474915, + "learning_rate": 2.1242848228876177e-07, + "loss": 0.1868438720703125, + "step": 14029 + }, + { + "epoch": 0.9483574422062998, + "grad_norm": 1.0019339323043823, + "learning_rate": 2.1187550935103383e-07, + "loss": 0.11069488525390625, + "step": 14030 + }, + { + "epoch": 0.948425037177234, + "grad_norm": 0.8053301572799683, + "learning_rate": 2.1132325195473367e-07, + "loss": 0.1239013671875, + "step": 14031 + }, + { + "epoch": 0.9484926321481681, + "grad_norm": 0.4720996916294098, + "learning_rate": 2.1077171012658657e-07, + "loss": 0.09624099731445312, + "step": 14032 + }, + { + "epoch": 0.9485602271191024, + "grad_norm": 0.7521361112594604, + "learning_rate": 2.1022088389327455e-07, + "loss": 0.11114501953125, + "step": 14033 + }, + { + "epoch": 0.9486278220900365, + "grad_norm": 0.9869219660758972, + "learning_rate": 2.0967077328145457e-07, + "loss": 0.159271240234375, + "step": 14034 + }, + { + "epoch": 0.9486954170609707, + "grad_norm": 0.9303832054138184, + "learning_rate": 2.0912137831774202e-07, + "loss": 0.12758302688598633, + "step": 14035 + }, + { + "epoch": 0.9487630120319048, + "grad_norm": 0.8714388608932495, + "learning_rate": 2.0857269902872235e-07, + "loss": 0.161865234375, + "step": 14036 + }, + { + "epoch": 0.9488306070028389, + "grad_norm": 0.6381720304489136, + "learning_rate": 2.0802473544094092e-07, + "loss": 0.106414794921875, + "step": 14037 + }, + { + "epoch": 0.9488982019737732, + "grad_norm": 1.3097273111343384, + "learning_rate": 2.0747748758091324e-07, + "loss": 0.22589111328125, + "step": 14038 + }, + { + "epoch": 0.9489657969447073, + "grad_norm": 1.2603598833084106, + "learning_rate": 2.0693095547511808e-07, + "loss": 0.11363983154296875, + "step": 14039 + }, + { + "epoch": 0.9490333919156415, + "grad_norm": 1.158937931060791, + "learning_rate": 2.0638513915000102e-07, + "loss": 0.1785430908203125, + "step": 14040 + }, + { + "epoch": 0.9491009868865756, + "grad_norm": 1.3285750150680542, + "learning_rate": 2.0584003863197086e-07, + "loss": 0.222503662109375, + "step": 14041 + }, + { + "epoch": 0.9491685818575099, + "grad_norm": 0.8657183647155762, + "learning_rate": 2.0529565394740323e-07, + "loss": 0.137725830078125, + "step": 14042 + }, + { + "epoch": 0.949236176828444, + "grad_norm": 0.4616299271583557, + "learning_rate": 2.0475198512264037e-07, + "loss": 0.07306671142578125, + "step": 14043 + }, + { + "epoch": 0.9493037717993781, + "grad_norm": 0.5220292210578918, + "learning_rate": 2.0420903218398457e-07, + "loss": 0.061920166015625, + "step": 14044 + }, + { + "epoch": 0.9493713667703123, + "grad_norm": 1.0526872873306274, + "learning_rate": 2.0366679515771313e-07, + "loss": 0.12409210205078125, + "step": 14045 + }, + { + "epoch": 0.9494389617412464, + "grad_norm": 0.5967249274253845, + "learning_rate": 2.0312527407005677e-07, + "loss": 0.1192779541015625, + "step": 14046 + }, + { + "epoch": 0.9495065567121806, + "grad_norm": 0.9079186916351318, + "learning_rate": 2.0258446894722282e-07, + "loss": 0.12275505065917969, + "step": 14047 + }, + { + "epoch": 0.9495741516831148, + "grad_norm": 0.48658785223960876, + "learning_rate": 2.0204437981537539e-07, + "loss": 0.08168792724609375, + "step": 14048 + }, + { + "epoch": 0.949641746654049, + "grad_norm": 0.9146097898483276, + "learning_rate": 2.0150500670064853e-07, + "loss": 0.11226654052734375, + "step": 14049 + }, + { + "epoch": 0.9497093416249831, + "grad_norm": 1.1622918844223022, + "learning_rate": 2.0096634962913973e-07, + "loss": 0.20758056640625, + "step": 14050 + }, + { + "epoch": 0.9497769365959172, + "grad_norm": 0.6625695824623108, + "learning_rate": 2.0042840862691314e-07, + "loss": 0.09632110595703125, + "step": 14051 + }, + { + "epoch": 0.9498445315668514, + "grad_norm": 0.3137259781360626, + "learning_rate": 1.9989118371999794e-07, + "loss": 0.054195404052734375, + "step": 14052 + }, + { + "epoch": 0.9499121265377856, + "grad_norm": 0.4127853810787201, + "learning_rate": 1.993546749343883e-07, + "loss": 0.07236099243164062, + "step": 14053 + }, + { + "epoch": 0.9499797215087198, + "grad_norm": 0.38322126865386963, + "learning_rate": 1.988188822960435e-07, + "loss": 0.06507110595703125, + "step": 14054 + }, + { + "epoch": 0.9500473164796539, + "grad_norm": 1.5562435388565063, + "learning_rate": 1.982838058308878e-07, + "loss": 0.2052001953125, + "step": 14055 + }, + { + "epoch": 0.9501149114505881, + "grad_norm": 1.5939197540283203, + "learning_rate": 1.9774944556481545e-07, + "loss": 0.13402175903320312, + "step": 14056 + }, + { + "epoch": 0.9501825064215222, + "grad_norm": 1.1218106746673584, + "learning_rate": 1.972158015236758e-07, + "loss": 0.228851318359375, + "step": 14057 + }, + { + "epoch": 0.9502501013924564, + "grad_norm": 0.5189003348350525, + "learning_rate": 1.9668287373329485e-07, + "loss": 0.0796661376953125, + "step": 14058 + }, + { + "epoch": 0.9503176963633906, + "grad_norm": 0.22639161348342896, + "learning_rate": 1.9615066221945865e-07, + "loss": 0.031887054443359375, + "step": 14059 + }, + { + "epoch": 0.9503852913343247, + "grad_norm": 0.8408716917037964, + "learning_rate": 1.9561916700791494e-07, + "loss": 0.12408447265625, + "step": 14060 + }, + { + "epoch": 0.9504528863052589, + "grad_norm": 1.354060173034668, + "learning_rate": 1.9508838812438646e-07, + "loss": 0.15401458740234375, + "step": 14061 + }, + { + "epoch": 0.950520481276193, + "grad_norm": 0.6335620880126953, + "learning_rate": 1.9455832559454934e-07, + "loss": 0.106842041015625, + "step": 14062 + }, + { + "epoch": 0.9505880762471273, + "grad_norm": 0.7913126945495605, + "learning_rate": 1.9402897944405472e-07, + "loss": 0.175384521484375, + "step": 14063 + }, + { + "epoch": 0.9506556712180614, + "grad_norm": 0.4409639239311218, + "learning_rate": 1.9350034969851716e-07, + "loss": 0.06897735595703125, + "step": 14064 + }, + { + "epoch": 0.9507232661889955, + "grad_norm": 0.9139677882194519, + "learning_rate": 1.929724363835128e-07, + "loss": 0.16094970703125, + "step": 14065 + }, + { + "epoch": 0.9507908611599297, + "grad_norm": 0.5039600729942322, + "learning_rate": 1.924452395245846e-07, + "loss": 0.1008148193359375, + "step": 14066 + }, + { + "epoch": 0.9508584561308638, + "grad_norm": 0.23190180957317352, + "learning_rate": 1.9191875914724377e-07, + "loss": 0.04180908203125, + "step": 14067 + }, + { + "epoch": 0.9509260511017981, + "grad_norm": 1.252870798110962, + "learning_rate": 1.913929952769633e-07, + "loss": 0.185638427734375, + "step": 14068 + }, + { + "epoch": 0.9509936460727322, + "grad_norm": 0.8701639175415039, + "learning_rate": 1.9086794793918617e-07, + "loss": 0.11906242370605469, + "step": 14069 + }, + { + "epoch": 0.9510612410436664, + "grad_norm": 0.6499161124229431, + "learning_rate": 1.9034361715931204e-07, + "loss": 0.11517333984375, + "step": 14070 + }, + { + "epoch": 0.9511288360146005, + "grad_norm": 0.471279114484787, + "learning_rate": 1.8982000296271395e-07, + "loss": 0.067962646484375, + "step": 14071 + }, + { + "epoch": 0.9511964309855346, + "grad_norm": 0.823665976524353, + "learning_rate": 1.8929710537472998e-07, + "loss": 0.1027374267578125, + "step": 14072 + }, + { + "epoch": 0.9512640259564689, + "grad_norm": 0.5719529390335083, + "learning_rate": 1.8877492442065492e-07, + "loss": 0.12298583984375, + "step": 14073 + }, + { + "epoch": 0.951331620927403, + "grad_norm": 0.8590548038482666, + "learning_rate": 1.8825346012576183e-07, + "loss": 0.118438720703125, + "step": 14074 + }, + { + "epoch": 0.9513992158983372, + "grad_norm": 0.320117712020874, + "learning_rate": 1.877327125152789e-07, + "loss": 0.0628509521484375, + "step": 14075 + }, + { + "epoch": 0.9514668108692713, + "grad_norm": 1.0670875310897827, + "learning_rate": 1.872126816144043e-07, + "loss": 0.181976318359375, + "step": 14076 + }, + { + "epoch": 0.9515344058402055, + "grad_norm": 0.6442375779151917, + "learning_rate": 1.8669336744829957e-07, + "loss": 0.1081695556640625, + "step": 14077 + }, + { + "epoch": 0.9516020008111397, + "grad_norm": 0.337205171585083, + "learning_rate": 1.8617477004209293e-07, + "loss": 0.042388916015625, + "step": 14078 + }, + { + "epoch": 0.9516695957820738, + "grad_norm": 1.3011770248413086, + "learning_rate": 1.8565688942087768e-07, + "loss": 0.192291259765625, + "step": 14079 + }, + { + "epoch": 0.951737190753008, + "grad_norm": 0.6937622427940369, + "learning_rate": 1.8513972560971037e-07, + "loss": 0.12405014038085938, + "step": 14080 + }, + { + "epoch": 0.9518047857239421, + "grad_norm": 0.6048588752746582, + "learning_rate": 1.846232786336144e-07, + "loss": 0.123992919921875, + "step": 14081 + }, + { + "epoch": 0.9518723806948763, + "grad_norm": 0.6543285846710205, + "learning_rate": 1.8410754851758305e-07, + "loss": 0.11406135559082031, + "step": 14082 + }, + { + "epoch": 0.9519399756658105, + "grad_norm": 0.3637307286262512, + "learning_rate": 1.835925352865664e-07, + "loss": 0.04491901397705078, + "step": 14083 + }, + { + "epoch": 0.9520075706367446, + "grad_norm": 0.5349791646003723, + "learning_rate": 1.8307823896548448e-07, + "loss": 0.1032562255859375, + "step": 14084 + }, + { + "epoch": 0.9520751656076788, + "grad_norm": 0.46007025241851807, + "learning_rate": 1.8256465957922408e-07, + "loss": 0.07517242431640625, + "step": 14085 + }, + { + "epoch": 0.9521427605786129, + "grad_norm": 0.9445479512214661, + "learning_rate": 1.8205179715263197e-07, + "loss": 0.10972213745117188, + "step": 14086 + }, + { + "epoch": 0.9522103555495471, + "grad_norm": 0.4694124758243561, + "learning_rate": 1.8153965171052832e-07, + "loss": 0.10064697265625, + "step": 14087 + }, + { + "epoch": 0.9522779505204813, + "grad_norm": 0.6534810662269592, + "learning_rate": 1.8102822327768997e-07, + "loss": 0.10645294189453125, + "step": 14088 + }, + { + "epoch": 0.9523455454914155, + "grad_norm": 0.720027506351471, + "learning_rate": 1.8051751187886379e-07, + "loss": 0.1378326416015625, + "step": 14089 + }, + { + "epoch": 0.9524131404623496, + "grad_norm": 0.4672004282474518, + "learning_rate": 1.8000751753876333e-07, + "loss": 0.086944580078125, + "step": 14090 + }, + { + "epoch": 0.9524807354332837, + "grad_norm": 0.2972787320613861, + "learning_rate": 1.794982402820622e-07, + "loss": 0.0389862060546875, + "step": 14091 + }, + { + "epoch": 0.9525483304042179, + "grad_norm": 0.3159223794937134, + "learning_rate": 1.7898968013340567e-07, + "loss": 0.047885894775390625, + "step": 14092 + }, + { + "epoch": 0.952615925375152, + "grad_norm": 0.642928421497345, + "learning_rate": 1.784818371173974e-07, + "loss": 0.107879638671875, + "step": 14093 + }, + { + "epoch": 0.9526835203460863, + "grad_norm": 1.3273602724075317, + "learning_rate": 1.7797471125861275e-07, + "loss": 0.151458740234375, + "step": 14094 + }, + { + "epoch": 0.9527511153170204, + "grad_norm": 0.8634501099586487, + "learning_rate": 1.7746830258158875e-07, + "loss": 0.14246368408203125, + "step": 14095 + }, + { + "epoch": 0.9528187102879546, + "grad_norm": 0.9321761727333069, + "learning_rate": 1.769626111108291e-07, + "loss": 0.12804412841796875, + "step": 14096 + }, + { + "epoch": 0.9528863052588887, + "grad_norm": 0.370995432138443, + "learning_rate": 1.7645763687080096e-07, + "loss": 0.06622695922851562, + "step": 14097 + }, + { + "epoch": 0.9529539002298228, + "grad_norm": 0.6041760444641113, + "learning_rate": 1.7595337988593972e-07, + "loss": 0.11492156982421875, + "step": 14098 + }, + { + "epoch": 0.9530214952007571, + "grad_norm": 0.9342883229255676, + "learning_rate": 1.754498401806426e-07, + "loss": 0.181884765625, + "step": 14099 + }, + { + "epoch": 0.9530890901716912, + "grad_norm": 2.0998470783233643, + "learning_rate": 1.7494701777927668e-07, + "loss": 0.183563232421875, + "step": 14100 + }, + { + "epoch": 0.9531566851426254, + "grad_norm": 0.38613161444664, + "learning_rate": 1.7444491270616925e-07, + "loss": 0.07868194580078125, + "step": 14101 + }, + { + "epoch": 0.9532242801135595, + "grad_norm": 1.330244779586792, + "learning_rate": 1.7394352498561583e-07, + "loss": 0.17620849609375, + "step": 14102 + }, + { + "epoch": 0.9532918750844938, + "grad_norm": 0.4716056287288666, + "learning_rate": 1.7344285464187704e-07, + "loss": 0.0700998306274414, + "step": 14103 + }, + { + "epoch": 0.9533594700554279, + "grad_norm": 1.769161343574524, + "learning_rate": 1.7294290169917848e-07, + "loss": 0.17572021484375, + "step": 14104 + }, + { + "epoch": 0.953427065026362, + "grad_norm": 1.200459599494934, + "learning_rate": 1.7244366618170915e-07, + "loss": 0.1661224365234375, + "step": 14105 + }, + { + "epoch": 0.9534946599972962, + "grad_norm": 0.5235292911529541, + "learning_rate": 1.719451481136297e-07, + "loss": 0.1058807373046875, + "step": 14106 + }, + { + "epoch": 0.9535622549682303, + "grad_norm": 0.5134308934211731, + "learning_rate": 1.7144734751905756e-07, + "loss": 0.07596969604492188, + "step": 14107 + }, + { + "epoch": 0.9536298499391646, + "grad_norm": 1.6348072290420532, + "learning_rate": 1.7095026442208005e-07, + "loss": 0.1934814453125, + "step": 14108 + }, + { + "epoch": 0.9536974449100987, + "grad_norm": 1.2924737930297852, + "learning_rate": 1.7045389884674967e-07, + "loss": 0.18121337890625, + "step": 14109 + }, + { + "epoch": 0.9537650398810329, + "grad_norm": 0.6402224898338318, + "learning_rate": 1.6995825081708382e-07, + "loss": 0.08350372314453125, + "step": 14110 + }, + { + "epoch": 0.953832634851967, + "grad_norm": 0.522095799446106, + "learning_rate": 1.6946332035706503e-07, + "loss": 0.09152793884277344, + "step": 14111 + }, + { + "epoch": 0.9539002298229011, + "grad_norm": 0.31564682722091675, + "learning_rate": 1.6896910749064076e-07, + "loss": 0.05921173095703125, + "step": 14112 + }, + { + "epoch": 0.9539678247938354, + "grad_norm": 0.36290445923805237, + "learning_rate": 1.684756122417236e-07, + "loss": 0.04850578308105469, + "step": 14113 + }, + { + "epoch": 0.9540354197647695, + "grad_norm": 0.9234908819198608, + "learning_rate": 1.6798283463419274e-07, + "loss": 0.195526123046875, + "step": 14114 + }, + { + "epoch": 0.9541030147357037, + "grad_norm": 1.0902270078659058, + "learning_rate": 1.6749077469189243e-07, + "loss": 0.14232635498046875, + "step": 14115 + }, + { + "epoch": 0.9541706097066378, + "grad_norm": 0.34152403473854065, + "learning_rate": 1.6699943243863202e-07, + "loss": 0.0435638427734375, + "step": 14116 + }, + { + "epoch": 0.954238204677572, + "grad_norm": 0.8159732222557068, + "learning_rate": 1.6650880789818245e-07, + "loss": 0.1353302001953125, + "step": 14117 + }, + { + "epoch": 0.9543057996485061, + "grad_norm": 0.7577801942825317, + "learning_rate": 1.6601890109428642e-07, + "loss": 0.1101226806640625, + "step": 14118 + }, + { + "epoch": 0.9543733946194403, + "grad_norm": 0.45312878489494324, + "learning_rate": 1.655297120506466e-07, + "loss": 0.05680656433105469, + "step": 14119 + }, + { + "epoch": 0.9544409895903745, + "grad_norm": 0.8511251211166382, + "learning_rate": 1.6504124079093409e-07, + "loss": 0.1547088623046875, + "step": 14120 + }, + { + "epoch": 0.9545085845613086, + "grad_norm": 0.5965701341629028, + "learning_rate": 1.645534873387833e-07, + "loss": 0.09070587158203125, + "step": 14121 + }, + { + "epoch": 0.9545761795322428, + "grad_norm": 0.6650394201278687, + "learning_rate": 1.64066451717797e-07, + "loss": 0.116058349609375, + "step": 14122 + }, + { + "epoch": 0.954643774503177, + "grad_norm": 0.3985464572906494, + "learning_rate": 1.6358013395153804e-07, + "loss": 0.06728363037109375, + "step": 14123 + }, + { + "epoch": 0.9547113694741112, + "grad_norm": 0.8158965110778809, + "learning_rate": 1.6309453406354091e-07, + "loss": 0.13933563232421875, + "step": 14124 + }, + { + "epoch": 0.9547789644450453, + "grad_norm": 1.1939408779144287, + "learning_rate": 1.6260965207729685e-07, + "loss": 0.13637161254882812, + "step": 14125 + }, + { + "epoch": 0.9548465594159794, + "grad_norm": 0.335224449634552, + "learning_rate": 1.6212548801627203e-07, + "loss": 0.056060791015625, + "step": 14126 + }, + { + "epoch": 0.9549141543869136, + "grad_norm": 0.2467084378004074, + "learning_rate": 1.6164204190389276e-07, + "loss": 0.04120826721191406, + "step": 14127 + }, + { + "epoch": 0.9549817493578477, + "grad_norm": 0.5222440958023071, + "learning_rate": 1.611593137635503e-07, + "loss": 0.10924530029296875, + "step": 14128 + }, + { + "epoch": 0.955049344328782, + "grad_norm": 0.2604812979698181, + "learning_rate": 1.60677303618601e-07, + "loss": 0.0391082763671875, + "step": 14129 + }, + { + "epoch": 0.9551169392997161, + "grad_norm": 0.28760313987731934, + "learning_rate": 1.6019601149236784e-07, + "loss": 0.040676116943359375, + "step": 14130 + }, + { + "epoch": 0.9551845342706503, + "grad_norm": 0.5337928533554077, + "learning_rate": 1.5971543740814054e-07, + "loss": 0.072052001953125, + "step": 14131 + }, + { + "epoch": 0.9552521292415844, + "grad_norm": 1.0828897953033447, + "learning_rate": 1.5923558138917215e-07, + "loss": 0.160491943359375, + "step": 14132 + }, + { + "epoch": 0.9553197242125185, + "grad_norm": 0.886117160320282, + "learning_rate": 1.5875644345867913e-07, + "loss": 0.11301231384277344, + "step": 14133 + }, + { + "epoch": 0.9553873191834528, + "grad_norm": 0.8331903219223022, + "learning_rate": 1.5827802363984457e-07, + "loss": 0.0894775390625, + "step": 14134 + }, + { + "epoch": 0.9554549141543869, + "grad_norm": 0.19643263518810272, + "learning_rate": 1.5780032195582162e-07, + "loss": 0.029632568359375, + "step": 14135 + }, + { + "epoch": 0.9555225091253211, + "grad_norm": 0.7823091745376587, + "learning_rate": 1.5732333842971847e-07, + "loss": 0.13042831420898438, + "step": 14136 + }, + { + "epoch": 0.9555901040962552, + "grad_norm": 1.4762192964553833, + "learning_rate": 1.5684707308462e-07, + "loss": 0.224395751953125, + "step": 14137 + }, + { + "epoch": 0.9556576990671894, + "grad_norm": 0.5023802518844604, + "learning_rate": 1.5637152594356775e-07, + "loss": 0.0792236328125, + "step": 14138 + }, + { + "epoch": 0.9557252940381236, + "grad_norm": 0.35665053129196167, + "learning_rate": 1.5589669702957333e-07, + "loss": 0.0552825927734375, + "step": 14139 + }, + { + "epoch": 0.9557928890090577, + "grad_norm": 0.3472631275653839, + "learning_rate": 1.554225863656117e-07, + "loss": 0.0481719970703125, + "step": 14140 + }, + { + "epoch": 0.9558604839799919, + "grad_norm": 0.2738996148109436, + "learning_rate": 1.5494919397462282e-07, + "loss": 0.03958892822265625, + "step": 14141 + }, + { + "epoch": 0.955928078950926, + "grad_norm": 1.1388251781463623, + "learning_rate": 1.5447651987951006e-07, + "loss": 0.1658477783203125, + "step": 14142 + }, + { + "epoch": 0.9559956739218602, + "grad_norm": 1.0130915641784668, + "learning_rate": 1.5400456410314846e-07, + "loss": 0.1927490234375, + "step": 14143 + }, + { + "epoch": 0.9560632688927944, + "grad_norm": 0.8267056345939636, + "learning_rate": 1.5353332666837305e-07, + "loss": 0.12502288818359375, + "step": 14144 + }, + { + "epoch": 0.9561308638637286, + "grad_norm": 0.7596327662467957, + "learning_rate": 1.530628075979823e-07, + "loss": 0.111541748046875, + "step": 14145 + }, + { + "epoch": 0.9561984588346627, + "grad_norm": 0.5488742589950562, + "learning_rate": 1.5259300691474631e-07, + "loss": 0.0697174072265625, + "step": 14146 + }, + { + "epoch": 0.9562660538055968, + "grad_norm": 0.5214700698852539, + "learning_rate": 1.5212392464139525e-07, + "loss": 0.0930023193359375, + "step": 14147 + }, + { + "epoch": 0.956333648776531, + "grad_norm": 1.01961350440979, + "learning_rate": 1.516555608006276e-07, + "loss": 0.154510498046875, + "step": 14148 + }, + { + "epoch": 0.9564012437474652, + "grad_norm": 0.9351374506950378, + "learning_rate": 1.5118791541510358e-07, + "loss": 0.13955307006835938, + "step": 14149 + }, + { + "epoch": 0.9564688387183994, + "grad_norm": 0.27710339426994324, + "learning_rate": 1.5072098850745341e-07, + "loss": 0.035861968994140625, + "step": 14150 + }, + { + "epoch": 0.9565364336893335, + "grad_norm": 0.601775586605072, + "learning_rate": 1.5025478010026738e-07, + "loss": 0.09292221069335938, + "step": 14151 + }, + { + "epoch": 0.9566040286602677, + "grad_norm": 0.3547622561454773, + "learning_rate": 1.497892902161041e-07, + "loss": 0.05399322509765625, + "step": 14152 + }, + { + "epoch": 0.9566716236312018, + "grad_norm": 0.9475136399269104, + "learning_rate": 1.4932451887748888e-07, + "loss": 0.16162109375, + "step": 14153 + }, + { + "epoch": 0.956739218602136, + "grad_norm": 0.6587855219841003, + "learning_rate": 1.4886046610690873e-07, + "loss": 0.10095977783203125, + "step": 14154 + }, + { + "epoch": 0.9568068135730702, + "grad_norm": 0.45301467180252075, + "learning_rate": 1.483971319268157e-07, + "loss": 0.07108306884765625, + "step": 14155 + }, + { + "epoch": 0.9568744085440043, + "grad_norm": 0.7521874904632568, + "learning_rate": 1.4793451635963184e-07, + "loss": 0.1041107177734375, + "step": 14156 + }, + { + "epoch": 0.9569420035149385, + "grad_norm": 0.3290307819843292, + "learning_rate": 1.4747261942774093e-07, + "loss": 0.055938720703125, + "step": 14157 + }, + { + "epoch": 0.9570095984858726, + "grad_norm": 0.2574670910835266, + "learning_rate": 1.4701144115349008e-07, + "loss": 0.04386138916015625, + "step": 14158 + }, + { + "epoch": 0.9570771934568069, + "grad_norm": 0.8150880336761475, + "learning_rate": 1.4655098155919644e-07, + "loss": 0.1283111572265625, + "step": 14159 + }, + { + "epoch": 0.957144788427741, + "grad_norm": 0.9394015073776245, + "learning_rate": 1.4609124066713718e-07, + "loss": 0.13898849487304688, + "step": 14160 + }, + { + "epoch": 0.9572123833986751, + "grad_norm": 0.2834433615207672, + "learning_rate": 1.456322184995612e-07, + "loss": 0.07306671142578125, + "step": 14161 + }, + { + "epoch": 0.9572799783696093, + "grad_norm": 0.45191314816474915, + "learning_rate": 1.451739150786757e-07, + "loss": 0.0881805419921875, + "step": 14162 + }, + { + "epoch": 0.9573475733405434, + "grad_norm": 0.22157147526741028, + "learning_rate": 1.44716330426658e-07, + "loss": 0.022403717041015625, + "step": 14163 + }, + { + "epoch": 0.9574151683114777, + "grad_norm": 0.35678571462631226, + "learning_rate": 1.4425946456564864e-07, + "loss": 0.0789947509765625, + "step": 14164 + }, + { + "epoch": 0.9574827632824118, + "grad_norm": 0.284540593624115, + "learning_rate": 1.4380331751775166e-07, + "loss": 0.05432891845703125, + "step": 14165 + }, + { + "epoch": 0.957550358253346, + "grad_norm": 0.35786452889442444, + "learning_rate": 1.4334788930504273e-07, + "loss": 0.072265625, + "step": 14166 + }, + { + "epoch": 0.9576179532242801, + "grad_norm": 0.6289003491401672, + "learning_rate": 1.4289317994955254e-07, + "loss": 0.1318206787109375, + "step": 14167 + }, + { + "epoch": 0.9576855481952142, + "grad_norm": 0.942517101764679, + "learning_rate": 1.4243918947328683e-07, + "loss": 0.12921905517578125, + "step": 14168 + }, + { + "epoch": 0.9577531431661485, + "grad_norm": 0.39720192551612854, + "learning_rate": 1.419859178982097e-07, + "loss": 0.06890869140625, + "step": 14169 + }, + { + "epoch": 0.9578207381370826, + "grad_norm": 0.30795156955718994, + "learning_rate": 1.4153336524625694e-07, + "loss": 0.03632926940917969, + "step": 14170 + }, + { + "epoch": 0.9578883331080168, + "grad_norm": 0.6204416155815125, + "learning_rate": 1.4108153153932267e-07, + "loss": 0.07946014404296875, + "step": 14171 + }, + { + "epoch": 0.9579559280789509, + "grad_norm": 0.38018810749053955, + "learning_rate": 1.4063041679927112e-07, + "loss": 0.048980712890625, + "step": 14172 + }, + { + "epoch": 0.9580235230498851, + "grad_norm": 0.5206217169761658, + "learning_rate": 1.401800210479298e-07, + "loss": 0.10860443115234375, + "step": 14173 + }, + { + "epoch": 0.9580911180208193, + "grad_norm": 0.20966829359531403, + "learning_rate": 1.3973034430709132e-07, + "loss": 0.027378082275390625, + "step": 14174 + }, + { + "epoch": 0.9581587129917534, + "grad_norm": 0.4060481786727905, + "learning_rate": 1.3928138659851153e-07, + "loss": 0.07053756713867188, + "step": 14175 + }, + { + "epoch": 0.9582263079626876, + "grad_norm": 0.6338616609573364, + "learning_rate": 1.388331479439181e-07, + "loss": 0.10959625244140625, + "step": 14176 + }, + { + "epoch": 0.9582939029336217, + "grad_norm": 0.30919936299324036, + "learning_rate": 1.3838562836499868e-07, + "loss": 0.03967714309692383, + "step": 14177 + }, + { + "epoch": 0.9583614979045559, + "grad_norm": 1.010095238685608, + "learning_rate": 1.379388278834043e-07, + "loss": 0.1208038330078125, + "step": 14178 + }, + { + "epoch": 0.95842909287549, + "grad_norm": 0.5115087032318115, + "learning_rate": 1.3749274652075594e-07, + "loss": 0.08087158203125, + "step": 14179 + }, + { + "epoch": 0.9584966878464243, + "grad_norm": 0.5506428480148315, + "learning_rate": 1.3704738429863638e-07, + "loss": 0.09586334228515625, + "step": 14180 + }, + { + "epoch": 0.9585642828173584, + "grad_norm": 0.49462395906448364, + "learning_rate": 1.3660274123859672e-07, + "loss": 0.04680633544921875, + "step": 14181 + }, + { + "epoch": 0.9586318777882925, + "grad_norm": 0.26995837688446045, + "learning_rate": 1.3615881736215142e-07, + "loss": 0.034511566162109375, + "step": 14182 + }, + { + "epoch": 0.9586994727592267, + "grad_norm": 0.9045244455337524, + "learning_rate": 1.3571561269077992e-07, + "loss": 0.1468353271484375, + "step": 14183 + }, + { + "epoch": 0.9587670677301608, + "grad_norm": 0.6992756724357605, + "learning_rate": 1.3527312724592679e-07, + "loss": 0.097259521484375, + "step": 14184 + }, + { + "epoch": 0.9588346627010951, + "grad_norm": 0.3882996439933777, + "learning_rate": 1.3483136104900317e-07, + "loss": 0.084686279296875, + "step": 14185 + }, + { + "epoch": 0.9589022576720292, + "grad_norm": 1.5344867706298828, + "learning_rate": 1.3439031412138202e-07, + "loss": 0.212005615234375, + "step": 14186 + }, + { + "epoch": 0.9589698526429634, + "grad_norm": 0.40265387296676636, + "learning_rate": 1.3394998648440793e-07, + "loss": 0.054813385009765625, + "step": 14187 + }, + { + "epoch": 0.9590374476138975, + "grad_norm": 0.27720558643341064, + "learning_rate": 1.3351037815938384e-07, + "loss": 0.04937744140625, + "step": 14188 + }, + { + "epoch": 0.9591050425848316, + "grad_norm": 1.0790796279907227, + "learning_rate": 1.330714891675794e-07, + "loss": 0.14705657958984375, + "step": 14189 + }, + { + "epoch": 0.9591726375557659, + "grad_norm": 1.384078860282898, + "learning_rate": 1.3263331953023594e-07, + "loss": 0.174407958984375, + "step": 14190 + }, + { + "epoch": 0.9592402325267, + "grad_norm": 0.4790206551551819, + "learning_rate": 1.321958692685482e-07, + "loss": 0.079620361328125, + "step": 14191 + }, + { + "epoch": 0.9593078274976342, + "grad_norm": 0.4029756486415863, + "learning_rate": 1.317591384036876e-07, + "loss": 0.05651092529296875, + "step": 14192 + }, + { + "epoch": 0.9593754224685683, + "grad_norm": 0.4186137616634369, + "learning_rate": 1.313231269567855e-07, + "loss": 0.06515121459960938, + "step": 14193 + }, + { + "epoch": 0.9594430174395026, + "grad_norm": 1.3819304704666138, + "learning_rate": 1.3088783494893674e-07, + "loss": 0.2080078125, + "step": 14194 + }, + { + "epoch": 0.9595106124104367, + "grad_norm": 0.4996911287307739, + "learning_rate": 1.3045326240120447e-07, + "loss": 0.0989227294921875, + "step": 14195 + }, + { + "epoch": 0.9595782073813708, + "grad_norm": 0.5848492980003357, + "learning_rate": 1.3001940933461687e-07, + "loss": 0.09967041015625, + "step": 14196 + }, + { + "epoch": 0.959645802352305, + "grad_norm": 1.0794274806976318, + "learning_rate": 1.2958627577016547e-07, + "loss": 0.11638259887695312, + "step": 14197 + }, + { + "epoch": 0.9597133973232391, + "grad_norm": 1.1612141132354736, + "learning_rate": 1.291538617288085e-07, + "loss": 0.1273651123046875, + "step": 14198 + }, + { + "epoch": 0.9597809922941734, + "grad_norm": 0.6822466254234314, + "learning_rate": 1.2872216723146756e-07, + "loss": 0.11972808837890625, + "step": 14199 + }, + { + "epoch": 0.9598485872651075, + "grad_norm": 0.7819499373435974, + "learning_rate": 1.2829119229903262e-07, + "loss": 0.177032470703125, + "step": 14200 + }, + { + "epoch": 0.9599161822360417, + "grad_norm": 0.3124453127384186, + "learning_rate": 1.2786093695235534e-07, + "loss": 0.04136371612548828, + "step": 14201 + }, + { + "epoch": 0.9599837772069758, + "grad_norm": 0.8994783163070679, + "learning_rate": 1.2743140121225404e-07, + "loss": 0.184906005859375, + "step": 14202 + }, + { + "epoch": 0.9600513721779099, + "grad_norm": 0.5859221816062927, + "learning_rate": 1.2700258509951546e-07, + "loss": 0.11156082153320312, + "step": 14203 + }, + { + "epoch": 0.9601189671488441, + "grad_norm": 0.5654280781745911, + "learning_rate": 1.2657448863488296e-07, + "loss": 0.12152099609375, + "step": 14204 + }, + { + "epoch": 0.9601865621197783, + "grad_norm": 1.0787409543991089, + "learning_rate": 1.2614711183907502e-07, + "loss": 0.10692977905273438, + "step": 14205 + }, + { + "epoch": 0.9602541570907125, + "grad_norm": 0.7693229913711548, + "learning_rate": 1.2572045473276838e-07, + "loss": 0.0854339599609375, + "step": 14206 + }, + { + "epoch": 0.9603217520616466, + "grad_norm": 0.3169393241405487, + "learning_rate": 1.252945173366099e-07, + "loss": 0.0473480224609375, + "step": 14207 + }, + { + "epoch": 0.9603893470325808, + "grad_norm": 0.49583715200424194, + "learning_rate": 1.248692996712064e-07, + "loss": 0.09202194213867188, + "step": 14208 + }, + { + "epoch": 0.960456942003515, + "grad_norm": 1.0396515130996704, + "learning_rate": 1.244448017571348e-07, + "loss": 0.1630096435546875, + "step": 14209 + }, + { + "epoch": 0.9605245369744491, + "grad_norm": 0.84026038646698, + "learning_rate": 1.2402102361493194e-07, + "loss": 0.1450958251953125, + "step": 14210 + }, + { + "epoch": 0.9605921319453833, + "grad_norm": 0.3497805893421173, + "learning_rate": 1.2359796526510646e-07, + "loss": 0.0735626220703125, + "step": 14211 + }, + { + "epoch": 0.9606597269163174, + "grad_norm": 0.4026927947998047, + "learning_rate": 1.23175626728127e-07, + "loss": 0.06427764892578125, + "step": 14212 + }, + { + "epoch": 0.9607273218872516, + "grad_norm": 0.9298023581504822, + "learning_rate": 1.2275400802442715e-07, + "loss": 0.15612030029296875, + "step": 14213 + }, + { + "epoch": 0.9607949168581857, + "grad_norm": 0.7603786587715149, + "learning_rate": 1.22333109174409e-07, + "loss": 0.141998291015625, + "step": 14214 + }, + { + "epoch": 0.9608625118291199, + "grad_norm": 0.6764287948608398, + "learning_rate": 1.2191293019843785e-07, + "loss": 0.1063995361328125, + "step": 14215 + }, + { + "epoch": 0.9609301068000541, + "grad_norm": 0.9788329601287842, + "learning_rate": 1.214934711168475e-07, + "loss": 0.13733291625976562, + "step": 14216 + }, + { + "epoch": 0.9609977017709882, + "grad_norm": 0.2708432078361511, + "learning_rate": 1.2107473194992836e-07, + "loss": 0.05323028564453125, + "step": 14217 + }, + { + "epoch": 0.9610652967419224, + "grad_norm": 0.9239545464515686, + "learning_rate": 1.2065671271794754e-07, + "loss": 0.198944091796875, + "step": 14218 + }, + { + "epoch": 0.9611328917128565, + "grad_norm": 0.3816787004470825, + "learning_rate": 1.202394134411272e-07, + "loss": 0.056854248046875, + "step": 14219 + }, + { + "epoch": 0.9612004866837908, + "grad_norm": 1.1043978929519653, + "learning_rate": 1.1982283413965957e-07, + "loss": 0.13702392578125, + "step": 14220 + }, + { + "epoch": 0.9612680816547249, + "grad_norm": 0.9310930967330933, + "learning_rate": 1.1940697483370344e-07, + "loss": 0.12998199462890625, + "step": 14221 + }, + { + "epoch": 0.961335676625659, + "grad_norm": 0.7036914825439453, + "learning_rate": 1.1899183554337945e-07, + "loss": 0.14315032958984375, + "step": 14222 + }, + { + "epoch": 0.9614032715965932, + "grad_norm": 0.8937991857528687, + "learning_rate": 1.1857741628877316e-07, + "loss": 0.15616607666015625, + "step": 14223 + }, + { + "epoch": 0.9614708665675273, + "grad_norm": 1.074766755104065, + "learning_rate": 1.1816371708993856e-07, + "loss": 0.1286468505859375, + "step": 14224 + }, + { + "epoch": 0.9615384615384616, + "grad_norm": 0.5141580700874329, + "learning_rate": 1.1775073796689295e-07, + "loss": 0.1085357666015625, + "step": 14225 + }, + { + "epoch": 0.9616060565093957, + "grad_norm": 1.0344759225845337, + "learning_rate": 1.1733847893961703e-07, + "loss": 0.12492179870605469, + "step": 14226 + }, + { + "epoch": 0.9616736514803299, + "grad_norm": 0.24145516753196716, + "learning_rate": 1.1692694002806148e-07, + "loss": 0.0373992919921875, + "step": 14227 + }, + { + "epoch": 0.961741246451264, + "grad_norm": 0.28082576394081116, + "learning_rate": 1.165161212521354e-07, + "loss": 0.02759838104248047, + "step": 14228 + }, + { + "epoch": 0.9618088414221981, + "grad_norm": 1.4433587789535522, + "learning_rate": 1.161060226317212e-07, + "loss": 0.17903900146484375, + "step": 14229 + }, + { + "epoch": 0.9618764363931324, + "grad_norm": 1.8891798257827759, + "learning_rate": 1.1569664418665804e-07, + "loss": 0.1419219970703125, + "step": 14230 + }, + { + "epoch": 0.9619440313640665, + "grad_norm": 0.24069347977638245, + "learning_rate": 1.1528798593675505e-07, + "loss": 0.0378570556640625, + "step": 14231 + }, + { + "epoch": 0.9620116263350007, + "grad_norm": 0.21022246778011322, + "learning_rate": 1.1488004790178807e-07, + "loss": 0.04782867431640625, + "step": 14232 + }, + { + "epoch": 0.9620792213059348, + "grad_norm": 0.32661500573158264, + "learning_rate": 1.1447283010149301e-07, + "loss": 0.06734466552734375, + "step": 14233 + }, + { + "epoch": 0.962146816276869, + "grad_norm": 0.9489731788635254, + "learning_rate": 1.1406633255557408e-07, + "loss": 0.1320343017578125, + "step": 14234 + }, + { + "epoch": 0.9622144112478032, + "grad_norm": 0.5789251923561096, + "learning_rate": 1.1366055528370223e-07, + "loss": 0.09956741333007812, + "step": 14235 + }, + { + "epoch": 0.9622820062187373, + "grad_norm": 0.598685622215271, + "learning_rate": 1.1325549830550841e-07, + "loss": 0.0983428955078125, + "step": 14236 + }, + { + "epoch": 0.9623496011896715, + "grad_norm": 1.1156227588653564, + "learning_rate": 1.1285116164059361e-07, + "loss": 0.1790618896484375, + "step": 14237 + }, + { + "epoch": 0.9624171961606056, + "grad_norm": 0.8395194411277771, + "learning_rate": 1.1244754530852219e-07, + "loss": 0.1485595703125, + "step": 14238 + }, + { + "epoch": 0.9624847911315398, + "grad_norm": 0.4260661005973816, + "learning_rate": 1.1204464932882186e-07, + "loss": 0.076446533203125, + "step": 14239 + }, + { + "epoch": 0.962552386102474, + "grad_norm": 0.29725950956344604, + "learning_rate": 1.1164247372099035e-07, + "loss": 0.05084228515625, + "step": 14240 + }, + { + "epoch": 0.9626199810734082, + "grad_norm": 1.2114231586456299, + "learning_rate": 1.1124101850448377e-07, + "loss": 0.209259033203125, + "step": 14241 + }, + { + "epoch": 0.9626875760443423, + "grad_norm": 0.29553067684173584, + "learning_rate": 1.1084028369873156e-07, + "loss": 0.0495758056640625, + "step": 14242 + }, + { + "epoch": 0.9627551710152764, + "grad_norm": 0.80079585313797, + "learning_rate": 1.1044026932311824e-07, + "loss": 0.159515380859375, + "step": 14243 + }, + { + "epoch": 0.9628227659862106, + "grad_norm": 0.37927332520484924, + "learning_rate": 1.1004097539700331e-07, + "loss": 0.0671539306640625, + "step": 14244 + }, + { + "epoch": 0.9628903609571448, + "grad_norm": 0.8737064599990845, + "learning_rate": 1.0964240193970631e-07, + "loss": 0.22540283203125, + "step": 14245 + }, + { + "epoch": 0.962957955928079, + "grad_norm": 0.839176595211029, + "learning_rate": 1.0924454897051183e-07, + "loss": 0.10501861572265625, + "step": 14246 + }, + { + "epoch": 0.9630255508990131, + "grad_norm": 0.697907567024231, + "learning_rate": 1.0884741650866947e-07, + "loss": 0.148101806640625, + "step": 14247 + }, + { + "epoch": 0.9630931458699473, + "grad_norm": 0.3543126881122589, + "learning_rate": 1.0845100457339718e-07, + "loss": 0.0470123291015625, + "step": 14248 + }, + { + "epoch": 0.9631607408408814, + "grad_norm": 0.9413689970970154, + "learning_rate": 1.0805531318387462e-07, + "loss": 0.187530517578125, + "step": 14249 + }, + { + "epoch": 0.9632283358118156, + "grad_norm": 1.5046789646148682, + "learning_rate": 1.0766034235924815e-07, + "loss": 0.201690673828125, + "step": 14250 + }, + { + "epoch": 0.9632959307827498, + "grad_norm": 0.22965162992477417, + "learning_rate": 1.0726609211862914e-07, + "loss": 0.0481719970703125, + "step": 14251 + }, + { + "epoch": 0.9633635257536839, + "grad_norm": 0.419319212436676, + "learning_rate": 1.0687256248109234e-07, + "loss": 0.07328414916992188, + "step": 14252 + }, + { + "epoch": 0.9634311207246181, + "grad_norm": 1.8060259819030762, + "learning_rate": 1.0647975346568084e-07, + "loss": 0.2193756103515625, + "step": 14253 + }, + { + "epoch": 0.9634987156955522, + "grad_norm": 0.8229827880859375, + "learning_rate": 1.0608766509140111e-07, + "loss": 0.15875244140625, + "step": 14254 + }, + { + "epoch": 0.9635663106664865, + "grad_norm": 1.5408390760421753, + "learning_rate": 1.0569629737722297e-07, + "loss": 0.2227630615234375, + "step": 14255 + }, + { + "epoch": 0.9636339056374206, + "grad_norm": 1.3910963535308838, + "learning_rate": 1.0530565034208628e-07, + "loss": 0.196075439453125, + "step": 14256 + }, + { + "epoch": 0.9637015006083547, + "grad_norm": 1.0533066987991333, + "learning_rate": 1.0491572400489092e-07, + "loss": 0.135498046875, + "step": 14257 + }, + { + "epoch": 0.9637690955792889, + "grad_norm": 0.4253518283367157, + "learning_rate": 1.045265183845051e-07, + "loss": 0.0620269775390625, + "step": 14258 + }, + { + "epoch": 0.963836690550223, + "grad_norm": 0.20547489821910858, + "learning_rate": 1.0413803349975881e-07, + "loss": 0.031463623046875, + "step": 14259 + }, + { + "epoch": 0.9639042855211573, + "grad_norm": 0.5576124787330627, + "learning_rate": 1.0375026936945198e-07, + "loss": 0.06367874145507812, + "step": 14260 + }, + { + "epoch": 0.9639718804920914, + "grad_norm": 0.9080888628959656, + "learning_rate": 1.033632260123446e-07, + "loss": 0.1286773681640625, + "step": 14261 + }, + { + "epoch": 0.9640394754630256, + "grad_norm": 0.6801220774650574, + "learning_rate": 1.0297690344716671e-07, + "loss": 0.12381744384765625, + "step": 14262 + }, + { + "epoch": 0.9641070704339597, + "grad_norm": 0.3077068626880646, + "learning_rate": 1.0259130169261e-07, + "loss": 0.027530193328857422, + "step": 14263 + }, + { + "epoch": 0.9641746654048938, + "grad_norm": 1.0405137538909912, + "learning_rate": 1.0220642076733122e-07, + "loss": 0.186248779296875, + "step": 14264 + }, + { + "epoch": 0.964242260375828, + "grad_norm": 0.670284628868103, + "learning_rate": 1.0182226068995381e-07, + "loss": 0.09568023681640625, + "step": 14265 + }, + { + "epoch": 0.9643098553467622, + "grad_norm": 0.7546709775924683, + "learning_rate": 1.0143882147906791e-07, + "loss": 0.0802764892578125, + "step": 14266 + }, + { + "epoch": 0.9643774503176964, + "grad_norm": 0.6429983973503113, + "learning_rate": 1.0105610315322367e-07, + "loss": 0.1248016357421875, + "step": 14267 + }, + { + "epoch": 0.9644450452886305, + "grad_norm": 0.5169079899787903, + "learning_rate": 1.0067410573094127e-07, + "loss": 0.05982208251953125, + "step": 14268 + }, + { + "epoch": 0.9645126402595647, + "grad_norm": 0.29493531584739685, + "learning_rate": 1.002928292307026e-07, + "loss": 0.04097747802734375, + "step": 14269 + }, + { + "epoch": 0.9645802352304989, + "grad_norm": 0.3660167455673218, + "learning_rate": 9.99122736709579e-08, + "loss": 0.05535125732421875, + "step": 14270 + }, + { + "epoch": 0.964647830201433, + "grad_norm": 0.48369261622428894, + "learning_rate": 9.953243907012077e-08, + "loss": 0.09481048583984375, + "step": 14271 + }, + { + "epoch": 0.9647154251723672, + "grad_norm": 1.809299111366272, + "learning_rate": 9.915332544656819e-08, + "loss": 0.17657470703125, + "step": 14272 + }, + { + "epoch": 0.9647830201433013, + "grad_norm": 0.2742532193660736, + "learning_rate": 9.877493281864547e-08, + "loss": 0.0282135009765625, + "step": 14273 + }, + { + "epoch": 0.9648506151142355, + "grad_norm": 0.4120151698589325, + "learning_rate": 9.83972612046613e-08, + "loss": 0.08522796630859375, + "step": 14274 + }, + { + "epoch": 0.9649182100851696, + "grad_norm": 0.5802560448646545, + "learning_rate": 9.802031062288941e-08, + "loss": 0.10506057739257812, + "step": 14275 + }, + { + "epoch": 0.9649858050561039, + "grad_norm": 0.9964219331741333, + "learning_rate": 9.764408109156852e-08, + "loss": 0.10682296752929688, + "step": 14276 + }, + { + "epoch": 0.965053400027038, + "grad_norm": 1.256965160369873, + "learning_rate": 9.726857262890576e-08, + "loss": 0.1385498046875, + "step": 14277 + }, + { + "epoch": 0.9651209949979721, + "grad_norm": 0.8123780488967896, + "learning_rate": 9.689378525306659e-08, + "loss": 0.151153564453125, + "step": 14278 + }, + { + "epoch": 0.9651885899689063, + "grad_norm": 0.8117684721946716, + "learning_rate": 9.651971898218815e-08, + "loss": 0.11237335205078125, + "step": 14279 + }, + { + "epoch": 0.9652561849398404, + "grad_norm": 0.3608689606189728, + "learning_rate": 9.614637383436931e-08, + "loss": 0.07431793212890625, + "step": 14280 + }, + { + "epoch": 0.9653237799107747, + "grad_norm": 0.6019473671913147, + "learning_rate": 9.577374982767562e-08, + "loss": 0.08042144775390625, + "step": 14281 + }, + { + "epoch": 0.9653913748817088, + "grad_norm": 0.7966582775115967, + "learning_rate": 9.540184698013766e-08, + "loss": 0.1341094970703125, + "step": 14282 + }, + { + "epoch": 0.965458969852643, + "grad_norm": 0.4537815749645233, + "learning_rate": 9.503066530974603e-08, + "loss": 0.059047698974609375, + "step": 14283 + }, + { + "epoch": 0.9655265648235771, + "grad_norm": 1.3933236598968506, + "learning_rate": 9.466020483446469e-08, + "loss": 0.24786376953125, + "step": 14284 + }, + { + "epoch": 0.9655941597945112, + "grad_norm": 0.6993690729141235, + "learning_rate": 9.429046557221931e-08, + "loss": 0.12883758544921875, + "step": 14285 + }, + { + "epoch": 0.9656617547654455, + "grad_norm": 0.7771390080451965, + "learning_rate": 9.392144754089726e-08, + "loss": 0.19384765625, + "step": 14286 + }, + { + "epoch": 0.9657293497363796, + "grad_norm": 0.9530540108680725, + "learning_rate": 9.355315075835591e-08, + "loss": 0.1640472412109375, + "step": 14287 + }, + { + "epoch": 0.9657969447073138, + "grad_norm": 0.6456061601638794, + "learning_rate": 9.3185575242416e-08, + "loss": 0.10297012329101562, + "step": 14288 + }, + { + "epoch": 0.9658645396782479, + "grad_norm": 0.19048818945884705, + "learning_rate": 9.281872101086164e-08, + "loss": 0.03199005126953125, + "step": 14289 + }, + { + "epoch": 0.9659321346491822, + "grad_norm": 0.7271285653114319, + "learning_rate": 9.24525880814453e-08, + "loss": 0.12738037109375, + "step": 14290 + }, + { + "epoch": 0.9659997296201163, + "grad_norm": 1.0351536273956299, + "learning_rate": 9.208717647188114e-08, + "loss": 0.129486083984375, + "step": 14291 + }, + { + "epoch": 0.9660673245910504, + "grad_norm": 0.3741903603076935, + "learning_rate": 9.172248619985169e-08, + "loss": 0.058048248291015625, + "step": 14292 + }, + { + "epoch": 0.9661349195619846, + "grad_norm": 0.9418779611587524, + "learning_rate": 9.135851728300116e-08, + "loss": 0.12213134765625, + "step": 14293 + }, + { + "epoch": 0.9662025145329187, + "grad_norm": 0.4743706285953522, + "learning_rate": 9.099526973894045e-08, + "loss": 0.0714263916015625, + "step": 14294 + }, + { + "epoch": 0.966270109503853, + "grad_norm": 0.5661564469337463, + "learning_rate": 9.063274358524721e-08, + "loss": 0.0927581787109375, + "step": 14295 + }, + { + "epoch": 0.9663377044747871, + "grad_norm": 0.7371262311935425, + "learning_rate": 9.027093883946235e-08, + "loss": 0.07018852233886719, + "step": 14296 + }, + { + "epoch": 0.9664052994457213, + "grad_norm": 0.5594843626022339, + "learning_rate": 8.990985551909192e-08, + "loss": 0.0845947265625, + "step": 14297 + }, + { + "epoch": 0.9664728944166554, + "grad_norm": 1.1238465309143066, + "learning_rate": 8.954949364160858e-08, + "loss": 0.180145263671875, + "step": 14298 + }, + { + "epoch": 0.9665404893875895, + "grad_norm": 0.5631547570228577, + "learning_rate": 8.918985322444673e-08, + "loss": 0.0912933349609375, + "step": 14299 + }, + { + "epoch": 0.9666080843585237, + "grad_norm": 0.31723037362098694, + "learning_rate": 8.88309342850091e-08, + "loss": 0.06777191162109375, + "step": 14300 + }, + { + "epoch": 0.9666756793294579, + "grad_norm": 0.7966693639755249, + "learning_rate": 8.847273684066348e-08, + "loss": 0.16413116455078125, + "step": 14301 + }, + { + "epoch": 0.9667432743003921, + "grad_norm": 0.41029661893844604, + "learning_rate": 8.811526090873933e-08, + "loss": 0.0916900634765625, + "step": 14302 + }, + { + "epoch": 0.9668108692713262, + "grad_norm": 0.39168769121170044, + "learning_rate": 8.775850650653616e-08, + "loss": 0.07221221923828125, + "step": 14303 + }, + { + "epoch": 0.9668784642422604, + "grad_norm": 0.19374433159828186, + "learning_rate": 8.740247365131349e-08, + "loss": 0.022985458374023438, + "step": 14304 + }, + { + "epoch": 0.9669460592131945, + "grad_norm": 0.6588008999824524, + "learning_rate": 8.704716236030086e-08, + "loss": 0.151336669921875, + "step": 14305 + }, + { + "epoch": 0.9670136541841287, + "grad_norm": 0.8500862717628479, + "learning_rate": 8.669257265068787e-08, + "loss": 0.1186065673828125, + "step": 14306 + }, + { + "epoch": 0.9670812491550629, + "grad_norm": 0.2090260088443756, + "learning_rate": 8.63387045396341e-08, + "loss": 0.025177001953125, + "step": 14307 + }, + { + "epoch": 0.967148844125997, + "grad_norm": 0.3858761787414551, + "learning_rate": 8.59855580442609e-08, + "loss": 0.05326080322265625, + "step": 14308 + }, + { + "epoch": 0.9672164390969312, + "grad_norm": 0.3201480805873871, + "learning_rate": 8.563313318165456e-08, + "loss": 0.05811309814453125, + "step": 14309 + }, + { + "epoch": 0.9672840340678653, + "grad_norm": 0.8665540218353271, + "learning_rate": 8.528142996886978e-08, + "loss": 0.10860061645507812, + "step": 14310 + }, + { + "epoch": 0.9673516290387996, + "grad_norm": 0.7173518538475037, + "learning_rate": 8.493044842292297e-08, + "loss": 0.160430908203125, + "step": 14311 + }, + { + "epoch": 0.9674192240097337, + "grad_norm": 0.3139011859893799, + "learning_rate": 8.458018856079553e-08, + "loss": 0.04505157470703125, + "step": 14312 + }, + { + "epoch": 0.9674868189806678, + "grad_norm": 0.3364429175853729, + "learning_rate": 8.423065039943723e-08, + "loss": 0.03369903564453125, + "step": 14313 + }, + { + "epoch": 0.967554413951602, + "grad_norm": 1.0622354745864868, + "learning_rate": 8.38818339557612e-08, + "loss": 0.212646484375, + "step": 14314 + }, + { + "epoch": 0.9676220089225361, + "grad_norm": 1.1712452173233032, + "learning_rate": 8.353373924664398e-08, + "loss": 0.189178466796875, + "step": 14315 + }, + { + "epoch": 0.9676896038934704, + "grad_norm": 1.0531766414642334, + "learning_rate": 8.318636628892873e-08, + "loss": 0.200958251953125, + "step": 14316 + }, + { + "epoch": 0.9677571988644045, + "grad_norm": 0.27701112627983093, + "learning_rate": 8.283971509942367e-08, + "loss": 0.04376983642578125, + "step": 14317 + }, + { + "epoch": 0.9678247938353387, + "grad_norm": 0.8162541389465332, + "learning_rate": 8.249378569490207e-08, + "loss": 0.146697998046875, + "step": 14318 + }, + { + "epoch": 0.9678923888062728, + "grad_norm": 0.26634782552719116, + "learning_rate": 8.214857809210219e-08, + "loss": 0.051361083984375, + "step": 14319 + }, + { + "epoch": 0.9679599837772069, + "grad_norm": 0.35718902945518494, + "learning_rate": 8.180409230772735e-08, + "loss": 0.06082916259765625, + "step": 14320 + }, + { + "epoch": 0.9680275787481412, + "grad_norm": 0.4870745539665222, + "learning_rate": 8.146032835844586e-08, + "loss": 0.077972412109375, + "step": 14321 + }, + { + "epoch": 0.9680951737190753, + "grad_norm": 1.1424857378005981, + "learning_rate": 8.111728626088943e-08, + "loss": 0.20318603515625, + "step": 14322 + }, + { + "epoch": 0.9681627686900095, + "grad_norm": 0.713074803352356, + "learning_rate": 8.077496603165979e-08, + "loss": 0.0970611572265625, + "step": 14323 + }, + { + "epoch": 0.9682303636609436, + "grad_norm": 0.5794278383255005, + "learning_rate": 8.043336768731868e-08, + "loss": 0.08094024658203125, + "step": 14324 + }, + { + "epoch": 0.9682979586318778, + "grad_norm": 0.2536674737930298, + "learning_rate": 8.009249124439621e-08, + "loss": 0.0490264892578125, + "step": 14325 + }, + { + "epoch": 0.968365553602812, + "grad_norm": 0.7184370160102844, + "learning_rate": 7.97523367193842e-08, + "loss": 0.07419013977050781, + "step": 14326 + }, + { + "epoch": 0.9684331485737461, + "grad_norm": 0.24900129437446594, + "learning_rate": 7.941290412874114e-08, + "loss": 0.039641380310058594, + "step": 14327 + }, + { + "epoch": 0.9685007435446803, + "grad_norm": 0.6698769927024841, + "learning_rate": 7.907419348889222e-08, + "loss": 0.14165496826171875, + "step": 14328 + }, + { + "epoch": 0.9685683385156144, + "grad_norm": 0.8012751936912537, + "learning_rate": 7.873620481622768e-08, + "loss": 0.11133956909179688, + "step": 14329 + }, + { + "epoch": 0.9686359334865486, + "grad_norm": 1.8683398962020874, + "learning_rate": 7.839893812709776e-08, + "loss": 0.186370849609375, + "step": 14330 + }, + { + "epoch": 0.9687035284574828, + "grad_norm": 0.33866527676582336, + "learning_rate": 7.806239343782439e-08, + "loss": 0.075958251953125, + "step": 14331 + }, + { + "epoch": 0.968771123428417, + "grad_norm": 0.5158811807632446, + "learning_rate": 7.772657076469125e-08, + "loss": 0.08452606201171875, + "step": 14332 + }, + { + "epoch": 0.9688387183993511, + "grad_norm": 0.2795328199863434, + "learning_rate": 7.739147012394699e-08, + "loss": 0.04315185546875, + "step": 14333 + }, + { + "epoch": 0.9689063133702852, + "grad_norm": 0.7688013315200806, + "learning_rate": 7.705709153180696e-08, + "loss": 0.131988525390625, + "step": 14334 + }, + { + "epoch": 0.9689739083412194, + "grad_norm": 1.1103184223175049, + "learning_rate": 7.672343500444823e-08, + "loss": 0.2389373779296875, + "step": 14335 + }, + { + "epoch": 0.9690415033121536, + "grad_norm": 0.48033642768859863, + "learning_rate": 7.639050055801788e-08, + "loss": 0.1068267822265625, + "step": 14336 + }, + { + "epoch": 0.9691090982830878, + "grad_norm": 1.1552352905273438, + "learning_rate": 7.605828820862304e-08, + "loss": 0.181732177734375, + "step": 14337 + }, + { + "epoch": 0.9691766932540219, + "grad_norm": 0.44097793102264404, + "learning_rate": 7.572679797233917e-08, + "loss": 0.06657791137695312, + "step": 14338 + }, + { + "epoch": 0.9692442882249561, + "grad_norm": 1.0435504913330078, + "learning_rate": 7.539602986520678e-08, + "loss": 0.07913780212402344, + "step": 14339 + }, + { + "epoch": 0.9693118831958902, + "grad_norm": 0.26661616563796997, + "learning_rate": 7.506598390322972e-08, + "loss": 0.034503936767578125, + "step": 14340 + }, + { + "epoch": 0.9693794781668243, + "grad_norm": 0.2953893840312958, + "learning_rate": 7.47366601023769e-08, + "loss": 0.0507354736328125, + "step": 14341 + }, + { + "epoch": 0.9694470731377586, + "grad_norm": 0.23046787083148956, + "learning_rate": 7.44080584785839e-08, + "loss": 0.03948211669921875, + "step": 14342 + }, + { + "epoch": 0.9695146681086927, + "grad_norm": 1.3491982221603394, + "learning_rate": 7.408017904774967e-08, + "loss": 0.183837890625, + "step": 14343 + }, + { + "epoch": 0.9695822630796269, + "grad_norm": 0.40497255325317383, + "learning_rate": 7.375302182573984e-08, + "loss": 0.087615966796875, + "step": 14344 + }, + { + "epoch": 0.969649858050561, + "grad_norm": 1.406672477722168, + "learning_rate": 7.34265868283851e-08, + "loss": 0.2154541015625, + "step": 14345 + }, + { + "epoch": 0.9697174530214951, + "grad_norm": 0.45941755175590515, + "learning_rate": 7.310087407147781e-08, + "loss": 0.0955352783203125, + "step": 14346 + }, + { + "epoch": 0.9697850479924294, + "grad_norm": 0.9302729368209839, + "learning_rate": 7.277588357078035e-08, + "loss": 0.160186767578125, + "step": 14347 + }, + { + "epoch": 0.9698526429633635, + "grad_norm": 0.5340638160705566, + "learning_rate": 7.245161534201683e-08, + "loss": 0.08615875244140625, + "step": 14348 + }, + { + "epoch": 0.9699202379342977, + "grad_norm": 0.8620736598968506, + "learning_rate": 7.212806940087802e-08, + "loss": 0.1716156005859375, + "step": 14349 + }, + { + "epoch": 0.9699878329052318, + "grad_norm": 0.2674214541912079, + "learning_rate": 7.180524576301972e-08, + "loss": 0.049747467041015625, + "step": 14350 + }, + { + "epoch": 0.970055427876166, + "grad_norm": 0.7706832885742188, + "learning_rate": 7.148314444405946e-08, + "loss": 0.08760261535644531, + "step": 14351 + }, + { + "epoch": 0.9701230228471002, + "grad_norm": 0.746189296245575, + "learning_rate": 7.116176545958474e-08, + "loss": 0.14507293701171875, + "step": 14352 + }, + { + "epoch": 0.9701906178180343, + "grad_norm": 0.4082292318344116, + "learning_rate": 7.084110882514484e-08, + "loss": 0.06371307373046875, + "step": 14353 + }, + { + "epoch": 0.9702582127889685, + "grad_norm": 0.21326878666877747, + "learning_rate": 7.052117455625562e-08, + "loss": 0.02545166015625, + "step": 14354 + }, + { + "epoch": 0.9703258077599026, + "grad_norm": 0.4115946888923645, + "learning_rate": 7.020196266839807e-08, + "loss": 0.076446533203125, + "step": 14355 + }, + { + "epoch": 0.9703934027308369, + "grad_norm": 0.5516557693481445, + "learning_rate": 6.988347317701649e-08, + "loss": 0.09401702880859375, + "step": 14356 + }, + { + "epoch": 0.970460997701771, + "grad_norm": 0.6320052742958069, + "learning_rate": 6.956570609752189e-08, + "loss": 0.0850067138671875, + "step": 14357 + }, + { + "epoch": 0.9705285926727052, + "grad_norm": 0.26660364866256714, + "learning_rate": 6.924866144529029e-08, + "loss": 0.031032562255859375, + "step": 14358 + }, + { + "epoch": 0.9705961876436393, + "grad_norm": 1.1928648948669434, + "learning_rate": 6.89323392356611e-08, + "loss": 0.13942718505859375, + "step": 14359 + }, + { + "epoch": 0.9706637826145734, + "grad_norm": 0.29224881529808044, + "learning_rate": 6.861673948394043e-08, + "loss": 0.0523834228515625, + "step": 14360 + }, + { + "epoch": 0.9707313775855076, + "grad_norm": 0.2364080250263214, + "learning_rate": 6.830186220539936e-08, + "loss": 0.03395652770996094, + "step": 14361 + }, + { + "epoch": 0.9707989725564418, + "grad_norm": 0.9336182475090027, + "learning_rate": 6.798770741527404e-08, + "loss": 0.167083740234375, + "step": 14362 + }, + { + "epoch": 0.970866567527376, + "grad_norm": 0.3747061789035797, + "learning_rate": 6.767427512876568e-08, + "loss": 0.0665130615234375, + "step": 14363 + }, + { + "epoch": 0.9709341624983101, + "grad_norm": 0.7626398205757141, + "learning_rate": 6.73615653610371e-08, + "loss": 0.154296875, + "step": 14364 + }, + { + "epoch": 0.9710017574692443, + "grad_norm": 0.5218607783317566, + "learning_rate": 6.704957812722124e-08, + "loss": 0.0797119140625, + "step": 14365 + }, + { + "epoch": 0.9710693524401784, + "grad_norm": 0.26487720012664795, + "learning_rate": 6.673831344241432e-08, + "loss": 0.0437164306640625, + "step": 14366 + }, + { + "epoch": 0.9711369474111126, + "grad_norm": 0.8012953996658325, + "learning_rate": 6.642777132167766e-08, + "loss": 0.140899658203125, + "step": 14367 + }, + { + "epoch": 0.9712045423820468, + "grad_norm": 0.38442543148994446, + "learning_rate": 6.611795178003421e-08, + "loss": 0.057865142822265625, + "step": 14368 + }, + { + "epoch": 0.9712721373529809, + "grad_norm": 0.9394755959510803, + "learning_rate": 6.580885483247867e-08, + "loss": 0.12407684326171875, + "step": 14369 + }, + { + "epoch": 0.9713397323239151, + "grad_norm": 1.0563665628433228, + "learning_rate": 6.550048049396406e-08, + "loss": 0.183807373046875, + "step": 14370 + }, + { + "epoch": 0.9714073272948492, + "grad_norm": 0.44861236214637756, + "learning_rate": 6.519282877941512e-08, + "loss": 0.08673095703125, + "step": 14371 + }, + { + "epoch": 0.9714749222657835, + "grad_norm": 1.5660290718078613, + "learning_rate": 6.488589970371329e-08, + "loss": 0.20477294921875, + "step": 14372 + }, + { + "epoch": 0.9715425172367176, + "grad_norm": 0.5943796038627625, + "learning_rate": 6.457969328171331e-08, + "loss": 0.0926361083984375, + "step": 14373 + }, + { + "epoch": 0.9716101122076517, + "grad_norm": 0.5042708516120911, + "learning_rate": 6.427420952823005e-08, + "loss": 0.08855056762695312, + "step": 14374 + }, + { + "epoch": 0.9716777071785859, + "grad_norm": 0.6486161947250366, + "learning_rate": 6.396944845804497e-08, + "loss": 0.08137893676757812, + "step": 14375 + }, + { + "epoch": 0.97174530214952, + "grad_norm": 0.4499227702617645, + "learning_rate": 6.366541008590465e-08, + "loss": 0.08663177490234375, + "step": 14376 + }, + { + "epoch": 0.9718128971204543, + "grad_norm": 1.3983789682388306, + "learning_rate": 6.336209442651897e-08, + "loss": 0.18267822265625, + "step": 14377 + }, + { + "epoch": 0.9718804920913884, + "grad_norm": 1.0022650957107544, + "learning_rate": 6.305950149456453e-08, + "loss": 0.13414382934570312, + "step": 14378 + }, + { + "epoch": 0.9719480870623226, + "grad_norm": 0.27090883255004883, + "learning_rate": 6.275763130468459e-08, + "loss": 0.040554046630859375, + "step": 14379 + }, + { + "epoch": 0.9720156820332567, + "grad_norm": 0.37996989488601685, + "learning_rate": 6.245648387148417e-08, + "loss": 0.07880401611328125, + "step": 14380 + }, + { + "epoch": 0.9720832770041908, + "grad_norm": 0.6074498891830444, + "learning_rate": 6.215605920953327e-08, + "loss": 0.106109619140625, + "step": 14381 + }, + { + "epoch": 0.9721508719751251, + "grad_norm": 0.5672016739845276, + "learning_rate": 6.185635733337024e-08, + "loss": 0.0992584228515625, + "step": 14382 + }, + { + "epoch": 0.9722184669460592, + "grad_norm": 0.9964084625244141, + "learning_rate": 6.155737825749686e-08, + "loss": 0.1196746826171875, + "step": 14383 + }, + { + "epoch": 0.9722860619169934, + "grad_norm": 0.2740556001663208, + "learning_rate": 6.125912199637818e-08, + "loss": 0.036174774169921875, + "step": 14384 + }, + { + "epoch": 0.9723536568879275, + "grad_norm": 0.4090825915336609, + "learning_rate": 6.0961588564446e-08, + "loss": 0.06438446044921875, + "step": 14385 + }, + { + "epoch": 0.9724212518588617, + "grad_norm": 0.2105431854724884, + "learning_rate": 6.066477797609715e-08, + "loss": 0.03137493133544922, + "step": 14386 + }, + { + "epoch": 0.9724888468297959, + "grad_norm": 0.3244228959083557, + "learning_rate": 6.036869024569346e-08, + "loss": 0.0568695068359375, + "step": 14387 + }, + { + "epoch": 0.97255644180073, + "grad_norm": 0.28224921226501465, + "learning_rate": 6.007332538756016e-08, + "loss": 0.05377197265625, + "step": 14388 + }, + { + "epoch": 0.9726240367716642, + "grad_norm": 0.3640567362308502, + "learning_rate": 5.977868341598913e-08, + "loss": 0.0594482421875, + "step": 14389 + }, + { + "epoch": 0.9726916317425983, + "grad_norm": 0.44394856691360474, + "learning_rate": 5.948476434523897e-08, + "loss": 0.04254150390625, + "step": 14390 + }, + { + "epoch": 0.9727592267135325, + "grad_norm": 0.2663930356502533, + "learning_rate": 5.9191568189529974e-08, + "loss": 0.03412628173828125, + "step": 14391 + }, + { + "epoch": 0.9728268216844667, + "grad_norm": 0.4914613366127014, + "learning_rate": 5.8899094963049125e-08, + "loss": 0.0877227783203125, + "step": 14392 + }, + { + "epoch": 0.9728944166554009, + "grad_norm": 1.0341094732284546, + "learning_rate": 5.860734467994677e-08, + "loss": 0.133209228515625, + "step": 14393 + }, + { + "epoch": 0.972962011626335, + "grad_norm": 0.842403769493103, + "learning_rate": 5.831631735434162e-08, + "loss": 0.14575958251953125, + "step": 14394 + }, + { + "epoch": 0.9730296065972691, + "grad_norm": 1.0683622360229492, + "learning_rate": 5.802601300031407e-08, + "loss": 0.12022256851196289, + "step": 14395 + }, + { + "epoch": 0.9730972015682033, + "grad_norm": 0.19934208691120148, + "learning_rate": 5.7736431631911225e-08, + "loss": 0.021869182586669922, + "step": 14396 + }, + { + "epoch": 0.9731647965391375, + "grad_norm": 1.4550069570541382, + "learning_rate": 5.7447573263143536e-08, + "loss": 0.1537322998046875, + "step": 14397 + }, + { + "epoch": 0.9732323915100717, + "grad_norm": 0.6309389472007751, + "learning_rate": 5.71594379079915e-08, + "loss": 0.08197021484375, + "step": 14398 + }, + { + "epoch": 0.9732999864810058, + "grad_norm": 0.5331107378005981, + "learning_rate": 5.687202558039228e-08, + "loss": 0.07458877563476562, + "step": 14399 + }, + { + "epoch": 0.97336758145194, + "grad_norm": 0.35501617193222046, + "learning_rate": 5.6585336294254774e-08, + "loss": 0.052947998046875, + "step": 14400 + }, + { + "epoch": 0.9734351764228741, + "grad_norm": 0.9273277521133423, + "learning_rate": 5.6299370063451204e-08, + "loss": 0.12727737426757812, + "step": 14401 + }, + { + "epoch": 0.9735027713938083, + "grad_norm": 0.5042507648468018, + "learning_rate": 5.601412690181884e-08, + "loss": 0.0701751708984375, + "step": 14402 + }, + { + "epoch": 0.9735703663647425, + "grad_norm": 0.8663356304168701, + "learning_rate": 5.572960682315664e-08, + "loss": 0.179534912109375, + "step": 14403 + }, + { + "epoch": 0.9736379613356766, + "grad_norm": 1.008931279182434, + "learning_rate": 5.544580984123526e-08, + "loss": 0.1327667236328125, + "step": 14404 + }, + { + "epoch": 0.9737055563066108, + "grad_norm": 0.7974595427513123, + "learning_rate": 5.5162735969785386e-08, + "loss": 0.087799072265625, + "step": 14405 + }, + { + "epoch": 0.9737731512775449, + "grad_norm": 0.4695480465888977, + "learning_rate": 5.488038522250105e-08, + "loss": 0.0735931396484375, + "step": 14406 + }, + { + "epoch": 0.9738407462484792, + "grad_norm": 0.6147263646125793, + "learning_rate": 5.4598757613048e-08, + "loss": 0.10425186157226562, + "step": 14407 + }, + { + "epoch": 0.9739083412194133, + "grad_norm": 0.22291217744350433, + "learning_rate": 5.4317853155052e-08, + "loss": 0.030609130859375, + "step": 14408 + }, + { + "epoch": 0.9739759361903474, + "grad_norm": 0.667458176612854, + "learning_rate": 5.403767186210218e-08, + "loss": 0.11266326904296875, + "step": 14409 + }, + { + "epoch": 0.9740435311612816, + "grad_norm": 0.27272436022758484, + "learning_rate": 5.375821374775936e-08, + "loss": 0.05066680908203125, + "step": 14410 + }, + { + "epoch": 0.9741111261322157, + "grad_norm": 0.7647687792778015, + "learning_rate": 5.347947882554438e-08, + "loss": 0.10573577880859375, + "step": 14411 + }, + { + "epoch": 0.97417872110315, + "grad_norm": 0.5108162760734558, + "learning_rate": 5.320146710894147e-08, + "loss": 0.08595085144042969, + "step": 14412 + }, + { + "epoch": 0.9742463160740841, + "grad_norm": 0.5218163132667542, + "learning_rate": 5.292417861140653e-08, + "loss": 0.088836669921875, + "step": 14413 + }, + { + "epoch": 0.9743139110450183, + "grad_norm": 0.49350568652153015, + "learning_rate": 5.264761334635382e-08, + "loss": 0.08415985107421875, + "step": 14414 + }, + { + "epoch": 0.9743815060159524, + "grad_norm": 0.36248907446861267, + "learning_rate": 5.237177132716764e-08, + "loss": 0.0377349853515625, + "step": 14415 + }, + { + "epoch": 0.9744491009868865, + "grad_norm": 0.3700915277004242, + "learning_rate": 5.2096652567192315e-08, + "loss": 0.077850341796875, + "step": 14416 + }, + { + "epoch": 0.9745166959578208, + "grad_norm": 0.24013133347034454, + "learning_rate": 5.1822257079740534e-08, + "loss": 0.029224395751953125, + "step": 14417 + }, + { + "epoch": 0.9745842909287549, + "grad_norm": 0.5151985883712769, + "learning_rate": 5.154858487809e-08, + "loss": 0.06345367431640625, + "step": 14418 + }, + { + "epoch": 0.9746518858996891, + "grad_norm": 1.369657278060913, + "learning_rate": 5.127563597548346e-08, + "loss": 0.15053558349609375, + "step": 14419 + }, + { + "epoch": 0.9747194808706232, + "grad_norm": 0.497224360704422, + "learning_rate": 5.100341038512535e-08, + "loss": 0.08283233642578125, + "step": 14420 + }, + { + "epoch": 0.9747870758415574, + "grad_norm": 1.4455623626708984, + "learning_rate": 5.073190812019013e-08, + "loss": 0.1110382080078125, + "step": 14421 + }, + { + "epoch": 0.9748546708124916, + "grad_norm": 0.3656282126903534, + "learning_rate": 5.046112919381229e-08, + "loss": 0.046718597412109375, + "step": 14422 + }, + { + "epoch": 0.9749222657834257, + "grad_norm": 1.015735387802124, + "learning_rate": 5.0191073619096364e-08, + "loss": 0.14483642578125, + "step": 14423 + }, + { + "epoch": 0.9749898607543599, + "grad_norm": 0.4621499180793762, + "learning_rate": 4.9921741409108545e-08, + "loss": 0.0809478759765625, + "step": 14424 + }, + { + "epoch": 0.975057455725294, + "grad_norm": 1.2832895517349243, + "learning_rate": 4.965313257687842e-08, + "loss": 0.1497039794921875, + "step": 14425 + }, + { + "epoch": 0.9751250506962282, + "grad_norm": 0.5313066244125366, + "learning_rate": 4.938524713540726e-08, + "loss": 0.111785888671875, + "step": 14426 + }, + { + "epoch": 0.9751926456671623, + "grad_norm": 1.443772792816162, + "learning_rate": 4.911808509765303e-08, + "loss": 0.1703033447265625, + "step": 14427 + }, + { + "epoch": 0.9752602406380966, + "grad_norm": 0.8833717107772827, + "learning_rate": 4.885164647654539e-08, + "loss": 0.10101318359375, + "step": 14428 + }, + { + "epoch": 0.9753278356090307, + "grad_norm": 0.45956867933273315, + "learning_rate": 4.8585931284974015e-08, + "loss": 0.080535888671875, + "step": 14429 + }, + { + "epoch": 0.9753954305799648, + "grad_norm": 0.21400392055511475, + "learning_rate": 4.8320939535798635e-08, + "loss": 0.0289764404296875, + "step": 14430 + }, + { + "epoch": 0.975463025550899, + "grad_norm": 0.4699811637401581, + "learning_rate": 4.8056671241838986e-08, + "loss": 0.08135986328125, + "step": 14431 + }, + { + "epoch": 0.9755306205218331, + "grad_norm": 0.15171387791633606, + "learning_rate": 4.779312641588318e-08, + "loss": 0.01811504364013672, + "step": 14432 + }, + { + "epoch": 0.9755982154927674, + "grad_norm": 0.9363105297088623, + "learning_rate": 4.753030507068268e-08, + "loss": 0.1404876708984375, + "step": 14433 + }, + { + "epoch": 0.9756658104637015, + "grad_norm": 0.23950302600860596, + "learning_rate": 4.7268207218953974e-08, + "loss": 0.036041259765625, + "step": 14434 + }, + { + "epoch": 0.9757334054346357, + "grad_norm": 0.23492221534252167, + "learning_rate": 4.7006832873380256e-08, + "loss": 0.0435791015625, + "step": 14435 + }, + { + "epoch": 0.9758010004055698, + "grad_norm": 0.4307892918586731, + "learning_rate": 4.674618204660641e-08, + "loss": 0.06848907470703125, + "step": 14436 + }, + { + "epoch": 0.9758685953765039, + "grad_norm": 0.4814366102218628, + "learning_rate": 4.648625475124568e-08, + "loss": 0.0875244140625, + "step": 14437 + }, + { + "epoch": 0.9759361903474382, + "grad_norm": 0.5351689457893372, + "learning_rate": 4.622705099987467e-08, + "loss": 0.08034896850585938, + "step": 14438 + }, + { + "epoch": 0.9760037853183723, + "grad_norm": 1.4485433101654053, + "learning_rate": 4.596857080503669e-08, + "loss": 0.1779022216796875, + "step": 14439 + }, + { + "epoch": 0.9760713802893065, + "grad_norm": 1.1001667976379395, + "learning_rate": 4.571081417923673e-08, + "loss": 0.1299304962158203, + "step": 14440 + }, + { + "epoch": 0.9761389752602406, + "grad_norm": 0.4751240909099579, + "learning_rate": 4.5453781134948135e-08, + "loss": 0.082611083984375, + "step": 14441 + }, + { + "epoch": 0.9762065702311749, + "grad_norm": 1.1165597438812256, + "learning_rate": 4.5197471684605974e-08, + "loss": 0.11913299560546875, + "step": 14442 + }, + { + "epoch": 0.976274165202109, + "grad_norm": 1.6501461267471313, + "learning_rate": 4.494188584061365e-08, + "loss": 0.180450439453125, + "step": 14443 + }, + { + "epoch": 0.9763417601730431, + "grad_norm": 1.2147560119628906, + "learning_rate": 4.4687023615336275e-08, + "loss": 0.20001220703125, + "step": 14444 + }, + { + "epoch": 0.9764093551439773, + "grad_norm": 0.5399879217147827, + "learning_rate": 4.443288502110732e-08, + "loss": 0.0917510986328125, + "step": 14445 + }, + { + "epoch": 0.9764769501149114, + "grad_norm": 1.4557709693908691, + "learning_rate": 4.41794700702236e-08, + "loss": 0.1003570556640625, + "step": 14446 + }, + { + "epoch": 0.9765445450858456, + "grad_norm": 0.8935292363166809, + "learning_rate": 4.392677877494533e-08, + "loss": 0.193634033203125, + "step": 14447 + }, + { + "epoch": 0.9766121400567798, + "grad_norm": 0.4465181827545166, + "learning_rate": 4.367481114750105e-08, + "loss": 0.07997894287109375, + "step": 14448 + }, + { + "epoch": 0.976679735027714, + "grad_norm": 1.1423643827438354, + "learning_rate": 4.3423567200081004e-08, + "loss": 0.209716796875, + "step": 14449 + }, + { + "epoch": 0.9767473299986481, + "grad_norm": 0.6861609220504761, + "learning_rate": 4.3173046944843806e-08, + "loss": 0.1225433349609375, + "step": 14450 + }, + { + "epoch": 0.9768149249695822, + "grad_norm": 0.41674861311912537, + "learning_rate": 4.292325039390976e-08, + "loss": 0.0911102294921875, + "step": 14451 + }, + { + "epoch": 0.9768825199405164, + "grad_norm": 0.8337565660476685, + "learning_rate": 4.267417755936587e-08, + "loss": 0.10369110107421875, + "step": 14452 + }, + { + "epoch": 0.9769501149114506, + "grad_norm": 0.40832260251045227, + "learning_rate": 4.242582845326415e-08, + "loss": 0.05359649658203125, + "step": 14453 + }, + { + "epoch": 0.9770177098823848, + "grad_norm": 0.754943311214447, + "learning_rate": 4.217820308762166e-08, + "loss": 0.11638641357421875, + "step": 14454 + }, + { + "epoch": 0.9770853048533189, + "grad_norm": 0.5132562518119812, + "learning_rate": 4.193130147441715e-08, + "loss": 0.07398796081542969, + "step": 14455 + }, + { + "epoch": 0.9771528998242531, + "grad_norm": 0.976094126701355, + "learning_rate": 4.168512362560106e-08, + "loss": 0.15038299560546875, + "step": 14456 + }, + { + "epoch": 0.9772204947951872, + "grad_norm": 0.37436291575431824, + "learning_rate": 4.143966955308387e-08, + "loss": 0.073486328125, + "step": 14457 + }, + { + "epoch": 0.9772880897661214, + "grad_norm": 0.38448822498321533, + "learning_rate": 4.119493926874107e-08, + "loss": 0.05889701843261719, + "step": 14458 + }, + { + "epoch": 0.9773556847370556, + "grad_norm": 0.2524266839027405, + "learning_rate": 4.0950932784414864e-08, + "loss": 0.03516387939453125, + "step": 14459 + }, + { + "epoch": 0.9774232797079897, + "grad_norm": 0.3011537194252014, + "learning_rate": 4.0707650111912465e-08, + "loss": 0.046779632568359375, + "step": 14460 + }, + { + "epoch": 0.9774908746789239, + "grad_norm": 0.9060578346252441, + "learning_rate": 4.046509126300446e-08, + "loss": 0.10696792602539062, + "step": 14461 + }, + { + "epoch": 0.977558469649858, + "grad_norm": 0.38896819949150085, + "learning_rate": 4.0223256249426465e-08, + "loss": 0.0689544677734375, + "step": 14462 + }, + { + "epoch": 0.9776260646207923, + "grad_norm": 0.3781201243400574, + "learning_rate": 3.998214508288245e-08, + "loss": 0.07726287841796875, + "step": 14463 + }, + { + "epoch": 0.9776936595917264, + "grad_norm": 0.22872181236743927, + "learning_rate": 3.9741757775038076e-08, + "loss": 0.03280305862426758, + "step": 14464 + }, + { + "epoch": 0.9777612545626605, + "grad_norm": 0.40542155504226685, + "learning_rate": 3.950209433752405e-08, + "loss": 0.0706024169921875, + "step": 14465 + }, + { + "epoch": 0.9778288495335947, + "grad_norm": 0.4192669987678528, + "learning_rate": 3.92631547819361e-08, + "loss": 0.05272674560546875, + "step": 14466 + }, + { + "epoch": 0.9778964445045288, + "grad_norm": 0.9709809422492981, + "learning_rate": 3.9024939119836624e-08, + "loss": 0.146881103515625, + "step": 14467 + }, + { + "epoch": 0.9779640394754631, + "grad_norm": 0.5470283031463623, + "learning_rate": 3.878744736275308e-08, + "loss": 0.116485595703125, + "step": 14468 + }, + { + "epoch": 0.9780316344463972, + "grad_norm": 0.7196934223175049, + "learning_rate": 3.8550679522174615e-08, + "loss": 0.11179351806640625, + "step": 14469 + }, + { + "epoch": 0.9780992294173313, + "grad_norm": 0.7557161450386047, + "learning_rate": 3.8314635609558726e-08, + "loss": 0.11663818359375, + "step": 14470 + }, + { + "epoch": 0.9781668243882655, + "grad_norm": 0.7328963875770569, + "learning_rate": 3.807931563632627e-08, + "loss": 0.12359619140625, + "step": 14471 + }, + { + "epoch": 0.9782344193591996, + "grad_norm": 0.6090266704559326, + "learning_rate": 3.784471961386482e-08, + "loss": 0.0841827392578125, + "step": 14472 + }, + { + "epoch": 0.9783020143301339, + "grad_norm": 0.9253597259521484, + "learning_rate": 3.761084755352362e-08, + "loss": 0.168365478515625, + "step": 14473 + }, + { + "epoch": 0.978369609301068, + "grad_norm": 1.0077470541000366, + "learning_rate": 3.7377699466620285e-08, + "loss": 0.1426239013671875, + "step": 14474 + }, + { + "epoch": 0.9784372042720022, + "grad_norm": 0.6722844243049622, + "learning_rate": 3.714527536443413e-08, + "loss": 0.06458663940429688, + "step": 14475 + }, + { + "epoch": 0.9785047992429363, + "grad_norm": 0.8481786251068115, + "learning_rate": 3.6913575258212815e-08, + "loss": 0.131561279296875, + "step": 14476 + }, + { + "epoch": 0.9785723942138704, + "grad_norm": 0.9775532484054565, + "learning_rate": 3.6682599159167383e-08, + "loss": 0.119140625, + "step": 14477 + }, + { + "epoch": 0.9786399891848047, + "grad_norm": 0.7203758358955383, + "learning_rate": 3.645234707847389e-08, + "loss": 0.08674430847167969, + "step": 14478 + }, + { + "epoch": 0.9787075841557388, + "grad_norm": 1.1821577548980713, + "learning_rate": 3.6222819027273427e-08, + "loss": 0.15964508056640625, + "step": 14479 + }, + { + "epoch": 0.978775179126673, + "grad_norm": 0.2342018336057663, + "learning_rate": 3.59940150166721e-08, + "loss": 0.02771759033203125, + "step": 14480 + }, + { + "epoch": 0.9788427740976071, + "grad_norm": 0.8065903782844543, + "learning_rate": 3.57659350577394e-08, + "loss": 0.117706298828125, + "step": 14481 + }, + { + "epoch": 0.9789103690685413, + "grad_norm": 0.6030983328819275, + "learning_rate": 3.553857916151315e-08, + "loss": 0.110443115234375, + "step": 14482 + }, + { + "epoch": 0.9789779640394755, + "grad_norm": 0.41356950998306274, + "learning_rate": 3.53119473389929e-08, + "loss": 0.061492919921875, + "step": 14483 + }, + { + "epoch": 0.9790455590104096, + "grad_norm": 0.4869946241378784, + "learning_rate": 3.508603960114487e-08, + "loss": 0.1013946533203125, + "step": 14484 + }, + { + "epoch": 0.9791131539813438, + "grad_norm": 0.7866855263710022, + "learning_rate": 3.4860855958901985e-08, + "loss": 0.145965576171875, + "step": 14485 + }, + { + "epoch": 0.9791807489522779, + "grad_norm": 0.3455051779747009, + "learning_rate": 3.4636396423155526e-08, + "loss": 0.06864166259765625, + "step": 14486 + }, + { + "epoch": 0.9792483439232121, + "grad_norm": 0.24203075468540192, + "learning_rate": 3.441266100477014e-08, + "loss": 0.024944305419921875, + "step": 14487 + }, + { + "epoch": 0.9793159388941463, + "grad_norm": 0.3499150276184082, + "learning_rate": 3.418964971456884e-08, + "loss": 0.0414276123046875, + "step": 14488 + }, + { + "epoch": 0.9793835338650805, + "grad_norm": 0.21609576046466827, + "learning_rate": 3.396736256334632e-08, + "loss": 0.02275848388671875, + "step": 14489 + }, + { + "epoch": 0.9794511288360146, + "grad_norm": 1.213655710220337, + "learning_rate": 3.374579956185231e-08, + "loss": 0.14500808715820312, + "step": 14490 + }, + { + "epoch": 0.9795187238069487, + "grad_norm": 0.6536402702331543, + "learning_rate": 3.352496072081324e-08, + "loss": 0.06952190399169922, + "step": 14491 + }, + { + "epoch": 0.9795863187778829, + "grad_norm": 0.6177904605865479, + "learning_rate": 3.3304846050910555e-08, + "loss": 0.1089019775390625, + "step": 14492 + }, + { + "epoch": 0.979653913748817, + "grad_norm": 1.0671823024749756, + "learning_rate": 3.308545556279741e-08, + "loss": 0.165618896484375, + "step": 14493 + }, + { + "epoch": 0.9797215087197513, + "grad_norm": 0.8132482767105103, + "learning_rate": 3.286678926708697e-08, + "loss": 0.10460710525512695, + "step": 14494 + }, + { + "epoch": 0.9797891036906854, + "grad_norm": 0.3561469614505768, + "learning_rate": 3.264884717436245e-08, + "loss": 0.053653717041015625, + "step": 14495 + }, + { + "epoch": 0.9798566986616196, + "grad_norm": 0.26759153604507446, + "learning_rate": 3.243162929516541e-08, + "loss": 0.04279327392578125, + "step": 14496 + }, + { + "epoch": 0.9799242936325537, + "grad_norm": 0.4555288255214691, + "learning_rate": 3.221513564001077e-08, + "loss": 0.079498291015625, + "step": 14497 + }, + { + "epoch": 0.9799918886034878, + "grad_norm": 1.1384588479995728, + "learning_rate": 3.199936621937016e-08, + "loss": 0.2060546875, + "step": 14498 + }, + { + "epoch": 0.9800594835744221, + "grad_norm": 0.9138811826705933, + "learning_rate": 3.178432104368523e-08, + "loss": 0.1682281494140625, + "step": 14499 + }, + { + "epoch": 0.9801270785453562, + "grad_norm": 0.40828442573547363, + "learning_rate": 3.1570000123362644e-08, + "loss": 0.05335235595703125, + "step": 14500 + }, + { + "epoch": 0.9801946735162904, + "grad_norm": 1.1215264797210693, + "learning_rate": 3.1356403468769115e-08, + "loss": 0.2022705078125, + "step": 14501 + }, + { + "epoch": 0.9802622684872245, + "grad_norm": 0.3848380446434021, + "learning_rate": 3.114353109024304e-08, + "loss": 0.03960990905761719, + "step": 14502 + }, + { + "epoch": 0.9803298634581588, + "grad_norm": 0.3490460515022278, + "learning_rate": 3.093138299808285e-08, + "loss": 0.036220550537109375, + "step": 14503 + }, + { + "epoch": 0.9803974584290929, + "grad_norm": 0.565904974937439, + "learning_rate": 3.071995920255366e-08, + "loss": 0.10624313354492188, + "step": 14504 + }, + { + "epoch": 0.980465053400027, + "grad_norm": 1.0948752164840698, + "learning_rate": 3.0509259713885627e-08, + "loss": 0.1854095458984375, + "step": 14505 + }, + { + "epoch": 0.9805326483709612, + "grad_norm": 0.29179540276527405, + "learning_rate": 3.02992845422756e-08, + "loss": 0.05048561096191406, + "step": 14506 + }, + { + "epoch": 0.9806002433418953, + "grad_norm": 1.5352506637573242, + "learning_rate": 3.009003369788044e-08, + "loss": 0.188507080078125, + "step": 14507 + }, + { + "epoch": 0.9806678383128296, + "grad_norm": 0.5672527551651001, + "learning_rate": 2.988150719082539e-08, + "loss": 0.0750885009765625, + "step": 14508 + }, + { + "epoch": 0.9807354332837637, + "grad_norm": 0.7134169340133667, + "learning_rate": 2.9673705031202368e-08, + "loss": 0.12816619873046875, + "step": 14509 + }, + { + "epoch": 0.9808030282546979, + "grad_norm": 0.5327911972999573, + "learning_rate": 2.9466627229065014e-08, + "loss": 0.1022491455078125, + "step": 14510 + }, + { + "epoch": 0.980870623225632, + "grad_norm": 0.8408723473548889, + "learning_rate": 2.9260273794433634e-08, + "loss": 0.1491851806640625, + "step": 14511 + }, + { + "epoch": 0.9809382181965661, + "grad_norm": 0.9402342438697815, + "learning_rate": 2.9054644737290247e-08, + "loss": 0.1598663330078125, + "step": 14512 + }, + { + "epoch": 0.9810058131675004, + "grad_norm": 0.7891754508018494, + "learning_rate": 2.884974006758856e-08, + "loss": 0.1287841796875, + "step": 14513 + }, + { + "epoch": 0.9810734081384345, + "grad_norm": 0.8150708675384521, + "learning_rate": 2.8645559795238973e-08, + "loss": 0.1595916748046875, + "step": 14514 + }, + { + "epoch": 0.9811410031093687, + "grad_norm": 0.39892539381980896, + "learning_rate": 2.8442103930123588e-08, + "loss": 0.082305908203125, + "step": 14515 + }, + { + "epoch": 0.9812085980803028, + "grad_norm": 0.3904649317264557, + "learning_rate": 2.8239372482087854e-08, + "loss": 0.0713043212890625, + "step": 14516 + }, + { + "epoch": 0.981276193051237, + "grad_norm": 0.4150339365005493, + "learning_rate": 2.8037365460938934e-08, + "loss": 0.068634033203125, + "step": 14517 + }, + { + "epoch": 0.9813437880221711, + "grad_norm": 0.7936696410179138, + "learning_rate": 2.7836082876450674e-08, + "loss": 0.1041412353515625, + "step": 14518 + }, + { + "epoch": 0.9814113829931053, + "grad_norm": 0.38089612126350403, + "learning_rate": 2.763552473836528e-08, + "loss": 0.048381805419921875, + "step": 14519 + }, + { + "epoch": 0.9814789779640395, + "grad_norm": 0.3590801954269409, + "learning_rate": 2.7435691056384994e-08, + "loss": 0.06219482421875, + "step": 14520 + }, + { + "epoch": 0.9815465729349736, + "grad_norm": 1.1726949214935303, + "learning_rate": 2.7236581840178743e-08, + "loss": 0.19451904296875, + "step": 14521 + }, + { + "epoch": 0.9816141679059078, + "grad_norm": 0.9287498593330383, + "learning_rate": 2.7038197099382154e-08, + "loss": 0.133331298828125, + "step": 14522 + }, + { + "epoch": 0.981681762876842, + "grad_norm": 1.3876049518585205, + "learning_rate": 2.6840536843592557e-08, + "loss": 0.1599407196044922, + "step": 14523 + }, + { + "epoch": 0.9817493578477762, + "grad_norm": 0.3454646170139313, + "learning_rate": 2.6643601082375624e-08, + "loss": 0.049556732177734375, + "step": 14524 + }, + { + "epoch": 0.9818169528187103, + "grad_norm": 1.0159761905670166, + "learning_rate": 2.6447389825258738e-08, + "loss": 0.1405181884765625, + "step": 14525 + }, + { + "epoch": 0.9818845477896444, + "grad_norm": 0.7537268996238708, + "learning_rate": 2.625190308173764e-08, + "loss": 0.1331787109375, + "step": 14526 + }, + { + "epoch": 0.9819521427605786, + "grad_norm": 0.7884060740470886, + "learning_rate": 2.605714086126809e-08, + "loss": 0.13072967529296875, + "step": 14527 + }, + { + "epoch": 0.9820197377315127, + "grad_norm": 1.0062415599822998, + "learning_rate": 2.5863103173277558e-08, + "loss": 0.1741485595703125, + "step": 14528 + }, + { + "epoch": 0.982087332702447, + "grad_norm": 0.5834217667579651, + "learning_rate": 2.5669790027153527e-08, + "loss": 0.113616943359375, + "step": 14529 + }, + { + "epoch": 0.9821549276733811, + "grad_norm": 0.5153977274894714, + "learning_rate": 2.547720143224852e-08, + "loss": 0.06859588623046875, + "step": 14530 + }, + { + "epoch": 0.9822225226443153, + "grad_norm": 0.751442551612854, + "learning_rate": 2.5285337397883413e-08, + "loss": 0.155120849609375, + "step": 14531 + }, + { + "epoch": 0.9822901176152494, + "grad_norm": 0.5431047677993774, + "learning_rate": 2.5094197933339112e-08, + "loss": 0.08192634582519531, + "step": 14532 + }, + { + "epoch": 0.9823577125861835, + "grad_norm": 1.131671667098999, + "learning_rate": 2.4903783047866556e-08, + "loss": 0.1312713623046875, + "step": 14533 + }, + { + "epoch": 0.9824253075571178, + "grad_norm": 1.118227243423462, + "learning_rate": 2.4714092750678375e-08, + "loss": 0.156768798828125, + "step": 14534 + }, + { + "epoch": 0.9824929025280519, + "grad_norm": 0.24818338453769684, + "learning_rate": 2.4525127050950557e-08, + "loss": 0.037937164306640625, + "step": 14535 + }, + { + "epoch": 0.9825604974989861, + "grad_norm": 1.5176692008972168, + "learning_rate": 2.433688595783079e-08, + "loss": 0.2000732421875, + "step": 14536 + }, + { + "epoch": 0.9826280924699202, + "grad_norm": 0.24968445301055908, + "learning_rate": 2.4149369480423454e-08, + "loss": 0.04309844970703125, + "step": 14537 + }, + { + "epoch": 0.9826956874408544, + "grad_norm": 0.5365039110183716, + "learning_rate": 2.3962577627804626e-08, + "loss": 0.09893035888671875, + "step": 14538 + }, + { + "epoch": 0.9827632824117886, + "grad_norm": 0.5681489109992981, + "learning_rate": 2.377651040901041e-08, + "loss": 0.0933380126953125, + "step": 14539 + }, + { + "epoch": 0.9828308773827227, + "grad_norm": 1.141746997833252, + "learning_rate": 2.359116783304527e-08, + "loss": 0.17840576171875, + "step": 14540 + }, + { + "epoch": 0.9828984723536569, + "grad_norm": 0.23404404520988464, + "learning_rate": 2.3406549908877027e-08, + "loss": 0.033847808837890625, + "step": 14541 + }, + { + "epoch": 0.982966067324591, + "grad_norm": 0.4249260723590851, + "learning_rate": 2.3222656645438545e-08, + "loss": 0.058040618896484375, + "step": 14542 + }, + { + "epoch": 0.9830336622955252, + "grad_norm": 1.13704514503479, + "learning_rate": 2.3039488051626035e-08, + "loss": 0.169830322265625, + "step": 14543 + }, + { + "epoch": 0.9831012572664594, + "grad_norm": 0.5070857405662537, + "learning_rate": 2.2857044136305738e-08, + "loss": 0.08496856689453125, + "step": 14544 + }, + { + "epoch": 0.9831688522373936, + "grad_norm": 1.6537736654281616, + "learning_rate": 2.2675324908302265e-08, + "loss": 0.16004180908203125, + "step": 14545 + }, + { + "epoch": 0.9832364472083277, + "grad_norm": 1.1337566375732422, + "learning_rate": 2.249433037640858e-08, + "loss": 0.2031707763671875, + "step": 14546 + }, + { + "epoch": 0.9833040421792618, + "grad_norm": 0.33539679646492004, + "learning_rate": 2.2314060549386006e-08, + "loss": 0.05419921875, + "step": 14547 + }, + { + "epoch": 0.983371637150196, + "grad_norm": 0.8672776818275452, + "learning_rate": 2.2134515435952575e-08, + "loss": 0.10578155517578125, + "step": 14548 + }, + { + "epoch": 0.9834392321211302, + "grad_norm": 0.4962286651134491, + "learning_rate": 2.195569504479633e-08, + "loss": 0.0556182861328125, + "step": 14549 + }, + { + "epoch": 0.9835068270920644, + "grad_norm": 1.4258729219436646, + "learning_rate": 2.1777599384573687e-08, + "loss": 0.1875, + "step": 14550 + }, + { + "epoch": 0.9835744220629985, + "grad_norm": 0.5172714591026306, + "learning_rate": 2.1600228463897752e-08, + "loss": 0.09490966796875, + "step": 14551 + }, + { + "epoch": 0.9836420170339327, + "grad_norm": 0.2205326110124588, + "learning_rate": 2.1423582291353327e-08, + "loss": 0.023000717163085938, + "step": 14552 + }, + { + "epoch": 0.9837096120048668, + "grad_norm": 0.26581689715385437, + "learning_rate": 2.124766087548691e-08, + "loss": 0.045894622802734375, + "step": 14553 + }, + { + "epoch": 0.983777206975801, + "grad_norm": 0.49963343143463135, + "learning_rate": 2.1072464224810016e-08, + "loss": 0.092681884765625, + "step": 14554 + }, + { + "epoch": 0.9838448019467352, + "grad_norm": 0.4033966064453125, + "learning_rate": 2.0897992347800877e-08, + "loss": 0.045078277587890625, + "step": 14555 + }, + { + "epoch": 0.9839123969176693, + "grad_norm": 1.3741531372070312, + "learning_rate": 2.0724245252899397e-08, + "loss": 0.1807875633239746, + "step": 14556 + }, + { + "epoch": 0.9839799918886035, + "grad_norm": 1.0627633333206177, + "learning_rate": 2.0551222948515526e-08, + "loss": 0.1723175048828125, + "step": 14557 + }, + { + "epoch": 0.9840475868595376, + "grad_norm": 1.5455539226531982, + "learning_rate": 2.0378925443019225e-08, + "loss": 0.238189697265625, + "step": 14558 + }, + { + "epoch": 0.9841151818304719, + "grad_norm": 0.7490437626838684, + "learning_rate": 2.0207352744747164e-08, + "loss": 0.1226043701171875, + "step": 14559 + }, + { + "epoch": 0.984182776801406, + "grad_norm": 0.6550219058990479, + "learning_rate": 2.0036504862002703e-08, + "loss": 0.098114013671875, + "step": 14560 + }, + { + "epoch": 0.9842503717723401, + "grad_norm": 0.4595043361186981, + "learning_rate": 1.9866381803050894e-08, + "loss": 0.058864593505859375, + "step": 14561 + }, + { + "epoch": 0.9843179667432743, + "grad_norm": 0.3654918372631073, + "learning_rate": 1.9696983576123484e-08, + "loss": 0.04345703125, + "step": 14562 + }, + { + "epoch": 0.9843855617142084, + "grad_norm": 1.8083120584487915, + "learning_rate": 1.9528310189418918e-08, + "loss": 0.26129150390625, + "step": 14563 + }, + { + "epoch": 0.9844531566851427, + "grad_norm": 0.9844211935997009, + "learning_rate": 1.936036165109567e-08, + "loss": 0.09552001953125, + "step": 14564 + }, + { + "epoch": 0.9845207516560768, + "grad_norm": 0.7712265253067017, + "learning_rate": 1.9193137969282237e-08, + "loss": 0.12039947509765625, + "step": 14565 + }, + { + "epoch": 0.984588346627011, + "grad_norm": 0.7424561381340027, + "learning_rate": 1.9026639152070478e-08, + "loss": 0.12767791748046875, + "step": 14566 + }, + { + "epoch": 0.9846559415979451, + "grad_norm": 1.9973992109298706, + "learning_rate": 1.8860865207513956e-08, + "loss": 0.2223663330078125, + "step": 14567 + }, + { + "epoch": 0.9847235365688792, + "grad_norm": 0.4294694662094116, + "learning_rate": 1.8695816143636246e-08, + "loss": 0.08461761474609375, + "step": 14568 + }, + { + "epoch": 0.9847911315398135, + "grad_norm": 0.7817615270614624, + "learning_rate": 1.8531491968422632e-08, + "loss": 0.169219970703125, + "step": 14569 + }, + { + "epoch": 0.9848587265107476, + "grad_norm": 0.8311428427696228, + "learning_rate": 1.8367892689825083e-08, + "loss": 0.13836669921875, + "step": 14570 + }, + { + "epoch": 0.9849263214816818, + "grad_norm": 0.3587202727794647, + "learning_rate": 1.8205018315757272e-08, + "loss": 0.059062957763671875, + "step": 14571 + }, + { + "epoch": 0.9849939164526159, + "grad_norm": 0.2914636731147766, + "learning_rate": 1.804286885410289e-08, + "loss": 0.049938201904296875, + "step": 14572 + }, + { + "epoch": 0.9850615114235501, + "grad_norm": 0.8085382580757141, + "learning_rate": 1.7881444312705664e-08, + "loss": 0.1375579833984375, + "step": 14573 + }, + { + "epoch": 0.9851291063944843, + "grad_norm": 0.19000835716724396, + "learning_rate": 1.772074469937601e-08, + "loss": 0.027935028076171875, + "step": 14574 + }, + { + "epoch": 0.9851967013654184, + "grad_norm": 0.35512080788612366, + "learning_rate": 1.7560770021891047e-08, + "loss": 0.043712615966796875, + "step": 14575 + }, + { + "epoch": 0.9852642963363526, + "grad_norm": 0.4429910182952881, + "learning_rate": 1.7401520287991247e-08, + "loss": 0.06826019287109375, + "step": 14576 + }, + { + "epoch": 0.9853318913072867, + "grad_norm": 0.5869206190109253, + "learning_rate": 1.7242995505382108e-08, + "loss": 0.07413482666015625, + "step": 14577 + }, + { + "epoch": 0.9853994862782209, + "grad_norm": 0.8784894347190857, + "learning_rate": 1.7085195681732502e-08, + "loss": 0.11975860595703125, + "step": 14578 + }, + { + "epoch": 0.985467081249155, + "grad_norm": 0.3157348930835724, + "learning_rate": 1.6928120824679648e-08, + "loss": 0.04366302490234375, + "step": 14579 + }, + { + "epoch": 0.9855346762200893, + "grad_norm": 0.5208009481430054, + "learning_rate": 1.6771770941822474e-08, + "loss": 0.07732772827148438, + "step": 14580 + }, + { + "epoch": 0.9856022711910234, + "grad_norm": 0.277156800031662, + "learning_rate": 1.6616146040728254e-08, + "loss": 0.04170989990234375, + "step": 14581 + }, + { + "epoch": 0.9856698661619575, + "grad_norm": 0.2966190278530121, + "learning_rate": 1.6461246128922636e-08, + "loss": 0.0578155517578125, + "step": 14582 + }, + { + "epoch": 0.9857374611328917, + "grad_norm": 1.0001578330993652, + "learning_rate": 1.6307071213906287e-08, + "loss": 0.08927297592163086, + "step": 14583 + }, + { + "epoch": 0.9858050561038258, + "grad_norm": 0.29464191198349, + "learning_rate": 1.6153621303134912e-08, + "loss": 0.03519439697265625, + "step": 14584 + }, + { + "epoch": 0.9858726510747601, + "grad_norm": 0.18440771102905273, + "learning_rate": 1.6000896404035902e-08, + "loss": 0.025682449340820312, + "step": 14585 + }, + { + "epoch": 0.9859402460456942, + "grad_norm": 0.9383023977279663, + "learning_rate": 1.5848896523996682e-08, + "loss": 0.1002655029296875, + "step": 14586 + }, + { + "epoch": 0.9860078410166284, + "grad_norm": 0.869674801826477, + "learning_rate": 1.5697621670374695e-08, + "loss": 0.1426849365234375, + "step": 14587 + }, + { + "epoch": 0.9860754359875625, + "grad_norm": 0.30498167872428894, + "learning_rate": 1.5547071850487425e-08, + "loss": 0.03897857666015625, + "step": 14588 + }, + { + "epoch": 0.9861430309584966, + "grad_norm": 0.3127249479293823, + "learning_rate": 1.5397247071620714e-08, + "loss": 0.05474853515625, + "step": 14589 + }, + { + "epoch": 0.9862106259294309, + "grad_norm": 0.8074806332588196, + "learning_rate": 1.524814734102209e-08, + "loss": 0.11999702453613281, + "step": 14590 + }, + { + "epoch": 0.986278220900365, + "grad_norm": 0.47379568219184875, + "learning_rate": 1.5099772665909116e-08, + "loss": 0.0951080322265625, + "step": 14591 + }, + { + "epoch": 0.9863458158712992, + "grad_norm": 0.46152958273887634, + "learning_rate": 1.4952123053457722e-08, + "loss": 0.07528305053710938, + "step": 14592 + }, + { + "epoch": 0.9864134108422333, + "grad_norm": 1.381513237953186, + "learning_rate": 1.4805198510813856e-08, + "loss": 0.180450439453125, + "step": 14593 + }, + { + "epoch": 0.9864810058131676, + "grad_norm": 1.507751226425171, + "learning_rate": 1.465899904508683e-08, + "loss": 0.142425537109375, + "step": 14594 + }, + { + "epoch": 0.9865486007841017, + "grad_norm": 0.40533554553985596, + "learning_rate": 1.4513524663350986e-08, + "loss": 0.07427215576171875, + "step": 14595 + }, + { + "epoch": 0.9866161957550358, + "grad_norm": 0.5800696015357971, + "learning_rate": 1.4368775372642362e-08, + "loss": 0.10010147094726562, + "step": 14596 + }, + { + "epoch": 0.98668379072597, + "grad_norm": 0.6089376211166382, + "learning_rate": 1.4224751179968687e-08, + "loss": 0.1180267333984375, + "step": 14597 + }, + { + "epoch": 0.9867513856969041, + "grad_norm": 0.7267046570777893, + "learning_rate": 1.408145209229772e-08, + "loss": 0.09144449234008789, + "step": 14598 + }, + { + "epoch": 0.9868189806678384, + "grad_norm": 0.399732381105423, + "learning_rate": 1.3938878116560583e-08, + "loss": 0.0611572265625, + "step": 14599 + }, + { + "epoch": 0.9868865756387725, + "grad_norm": 0.29953303933143616, + "learning_rate": 1.3797029259660088e-08, + "loss": 0.03775787353515625, + "step": 14600 + }, + { + "epoch": 0.9869541706097066, + "grad_norm": 0.45767635107040405, + "learning_rate": 1.3655905528455747e-08, + "loss": 0.084686279296875, + "step": 14601 + }, + { + "epoch": 0.9870217655806408, + "grad_norm": 0.43166953325271606, + "learning_rate": 1.3515506929778765e-08, + "loss": 0.06610870361328125, + "step": 14602 + }, + { + "epoch": 0.9870893605515749, + "grad_norm": 0.2639087736606598, + "learning_rate": 1.3375833470420374e-08, + "loss": 0.040805816650390625, + "step": 14603 + }, + { + "epoch": 0.9871569555225091, + "grad_norm": 0.37037941813468933, + "learning_rate": 1.3236885157140165e-08, + "loss": 0.0591888427734375, + "step": 14604 + }, + { + "epoch": 0.9872245504934433, + "grad_norm": 0.7694442868232727, + "learning_rate": 1.3098661996662765e-08, + "loss": 0.1136474609375, + "step": 14605 + }, + { + "epoch": 0.9872921454643775, + "grad_norm": 0.6982242465019226, + "learning_rate": 1.296116399567282e-08, + "loss": 0.10912704467773438, + "step": 14606 + }, + { + "epoch": 0.9873597404353116, + "grad_norm": 0.33121633529663086, + "learning_rate": 1.2824391160825011e-08, + "loss": 0.02701568603515625, + "step": 14607 + }, + { + "epoch": 0.9874273354062457, + "grad_norm": 0.4872824549674988, + "learning_rate": 1.268834349873904e-08, + "loss": 0.0712890625, + "step": 14608 + }, + { + "epoch": 0.98749493037718, + "grad_norm": 0.5975381731987, + "learning_rate": 1.2553021015994647e-08, + "loss": 0.11639404296875, + "step": 14609 + }, + { + "epoch": 0.9875625253481141, + "grad_norm": 0.6491500735282898, + "learning_rate": 1.2418423719141591e-08, + "loss": 0.11370086669921875, + "step": 14610 + }, + { + "epoch": 0.9876301203190483, + "grad_norm": 1.152923345565796, + "learning_rate": 1.2284551614692996e-08, + "loss": 0.16912841796875, + "step": 14611 + }, + { + "epoch": 0.9876977152899824, + "grad_norm": 0.6306784152984619, + "learning_rate": 1.2151404709123682e-08, + "loss": 0.10142135620117188, + "step": 14612 + }, + { + "epoch": 0.9877653102609166, + "grad_norm": 0.6253101825714111, + "learning_rate": 1.2018983008878493e-08, + "loss": 0.0958709716796875, + "step": 14613 + }, + { + "epoch": 0.9878329052318507, + "grad_norm": 0.28569266200065613, + "learning_rate": 1.1887286520363972e-08, + "loss": 0.0460052490234375, + "step": 14614 + }, + { + "epoch": 0.9879005002027849, + "grad_norm": 0.7083909511566162, + "learning_rate": 1.1756315249953354e-08, + "loss": 0.1119384765625, + "step": 14615 + }, + { + "epoch": 0.9879680951737191, + "grad_norm": 0.952385425567627, + "learning_rate": 1.1626069203983237e-08, + "loss": 0.13531494140625, + "step": 14616 + }, + { + "epoch": 0.9880356901446532, + "grad_norm": 0.31365448236465454, + "learning_rate": 1.149654838875358e-08, + "loss": 0.05727386474609375, + "step": 14617 + }, + { + "epoch": 0.9881032851155874, + "grad_norm": 0.3970549404621124, + "learning_rate": 1.1367752810536036e-08, + "loss": 0.0682830810546875, + "step": 14618 + }, + { + "epoch": 0.9881708800865215, + "grad_norm": 0.7491266131401062, + "learning_rate": 1.1239682475558955e-08, + "loss": 0.101654052734375, + "step": 14619 + }, + { + "epoch": 0.9882384750574558, + "grad_norm": 0.8308393955230713, + "learning_rate": 1.1112337390019046e-08, + "loss": 0.10729217529296875, + "step": 14620 + }, + { + "epoch": 0.9883060700283899, + "grad_norm": 0.18789628148078918, + "learning_rate": 1.098571756007971e-08, + "loss": 0.017232894897460938, + "step": 14621 + }, + { + "epoch": 0.988373664999324, + "grad_norm": 0.5104793310165405, + "learning_rate": 1.0859822991867719e-08, + "loss": 0.119659423828125, + "step": 14622 + }, + { + "epoch": 0.9884412599702582, + "grad_norm": 0.6427363157272339, + "learning_rate": 1.0734653691473196e-08, + "loss": 0.11529541015625, + "step": 14623 + }, + { + "epoch": 0.9885088549411923, + "grad_norm": 1.3183648586273193, + "learning_rate": 1.0610209664954628e-08, + "loss": 0.11071014404296875, + "step": 14624 + }, + { + "epoch": 0.9885764499121266, + "grad_norm": 0.6576656699180603, + "learning_rate": 1.0486490918330537e-08, + "loss": 0.1277008056640625, + "step": 14625 + }, + { + "epoch": 0.9886440448830607, + "grad_norm": 0.24945157766342163, + "learning_rate": 1.0363497457589465e-08, + "loss": 0.046295166015625, + "step": 14626 + }, + { + "epoch": 0.9887116398539949, + "grad_norm": 0.6033456325531006, + "learning_rate": 1.0241229288681652e-08, + "loss": 0.09228515625, + "step": 14627 + }, + { + "epoch": 0.988779234824929, + "grad_norm": 0.590742826461792, + "learning_rate": 1.0119686417524033e-08, + "loss": 0.11274337768554688, + "step": 14628 + }, + { + "epoch": 0.9888468297958631, + "grad_norm": 0.8653421998023987, + "learning_rate": 9.998868849996901e-09, + "loss": 0.1696014404296875, + "step": 14629 + }, + { + "epoch": 0.9889144247667974, + "grad_norm": 0.4132118821144104, + "learning_rate": 9.878776591945582e-09, + "loss": 0.05874776840209961, + "step": 14630 + }, + { + "epoch": 0.9889820197377315, + "grad_norm": 1.3728162050247192, + "learning_rate": 9.759409649182094e-09, + "loss": 0.1791229248046875, + "step": 14631 + }, + { + "epoch": 0.9890496147086657, + "grad_norm": 1.634772539138794, + "learning_rate": 9.640768027481816e-09, + "loss": 0.25018310546875, + "step": 14632 + }, + { + "epoch": 0.9891172096795998, + "grad_norm": 0.4686322808265686, + "learning_rate": 9.522851732585158e-09, + "loss": 0.08132171630859375, + "step": 14633 + }, + { + "epoch": 0.989184804650534, + "grad_norm": 0.9576737284660339, + "learning_rate": 9.40566077019922e-09, + "loss": 0.23583984375, + "step": 14634 + }, + { + "epoch": 0.9892523996214682, + "grad_norm": 0.9337003231048584, + "learning_rate": 9.289195145991137e-09, + "loss": 0.155487060546875, + "step": 14635 + }, + { + "epoch": 0.9893199945924023, + "grad_norm": 0.793224036693573, + "learning_rate": 9.173454865598063e-09, + "loss": 0.1419677734375, + "step": 14636 + }, + { + "epoch": 0.9893875895633365, + "grad_norm": 1.0413511991500854, + "learning_rate": 9.058439934620522e-09, + "loss": 0.234832763671875, + "step": 14637 + }, + { + "epoch": 0.9894551845342706, + "grad_norm": 0.5465304851531982, + "learning_rate": 8.944150358622393e-09, + "loss": 0.1038055419921875, + "step": 14638 + }, + { + "epoch": 0.9895227795052048, + "grad_norm": 0.3229949474334717, + "learning_rate": 8.83058614313592e-09, + "loss": 0.047946929931640625, + "step": 14639 + }, + { + "epoch": 0.989590374476139, + "grad_norm": 0.3782537877559662, + "learning_rate": 8.717747293655043e-09, + "loss": 0.059906005859375, + "step": 14640 + }, + { + "epoch": 0.9896579694470732, + "grad_norm": 0.7183416485786438, + "learning_rate": 8.605633815638725e-09, + "loss": 0.09200477600097656, + "step": 14641 + }, + { + "epoch": 0.9897255644180073, + "grad_norm": 0.5298599004745483, + "learning_rate": 8.494245714510962e-09, + "loss": 0.09714889526367188, + "step": 14642 + }, + { + "epoch": 0.9897931593889414, + "grad_norm": 1.0347576141357422, + "learning_rate": 8.383582995664108e-09, + "loss": 0.148651123046875, + "step": 14643 + }, + { + "epoch": 0.9898607543598756, + "grad_norm": 0.8581715822219849, + "learning_rate": 8.273645664452212e-09, + "loss": 0.14181900024414062, + "step": 14644 + }, + { + "epoch": 0.9899283493308098, + "grad_norm": 0.35348838567733765, + "learning_rate": 8.164433726192688e-09, + "loss": 0.0500335693359375, + "step": 14645 + }, + { + "epoch": 0.989995944301744, + "grad_norm": 1.2004717588424683, + "learning_rate": 8.055947186171308e-09, + "loss": 0.151763916015625, + "step": 14646 + }, + { + "epoch": 0.9900635392726781, + "grad_norm": 0.8461257815361023, + "learning_rate": 7.948186049637207e-09, + "loss": 0.1223297119140625, + "step": 14647 + }, + { + "epoch": 0.9901311342436123, + "grad_norm": 0.9005039930343628, + "learning_rate": 7.841150321804546e-09, + "loss": 0.144378662109375, + "step": 14648 + }, + { + "epoch": 0.9901987292145464, + "grad_norm": 0.3499279022216797, + "learning_rate": 7.734840007852518e-09, + "loss": 0.05138397216796875, + "step": 14649 + }, + { + "epoch": 0.9902663241854806, + "grad_norm": 1.5833823680877686, + "learning_rate": 7.629255112923672e-09, + "loss": 0.1269378662109375, + "step": 14650 + }, + { + "epoch": 0.9903339191564148, + "grad_norm": 0.6031163334846497, + "learning_rate": 7.524395642128923e-09, + "loss": 0.0998077392578125, + "step": 14651 + }, + { + "epoch": 0.9904015141273489, + "grad_norm": 0.35333168506622314, + "learning_rate": 7.420261600540879e-09, + "loss": 0.06209564208984375, + "step": 14652 + }, + { + "epoch": 0.9904691090982831, + "grad_norm": 0.1855117231607437, + "learning_rate": 7.316852993198841e-09, + "loss": 0.019578933715820312, + "step": 14653 + }, + { + "epoch": 0.9905367040692172, + "grad_norm": 0.26615598797798157, + "learning_rate": 7.214169825105476e-09, + "loss": 0.048450469970703125, + "step": 14654 + }, + { + "epoch": 0.9906042990401515, + "grad_norm": 1.9590694904327393, + "learning_rate": 7.1122121012301425e-09, + "loss": 0.12310791015625, + "step": 14655 + }, + { + "epoch": 0.9906718940110856, + "grad_norm": 0.36936312913894653, + "learning_rate": 7.010979826505559e-09, + "loss": 0.04500579833984375, + "step": 14656 + }, + { + "epoch": 0.9907394889820197, + "grad_norm": 0.5663021802902222, + "learning_rate": 6.9104730058294765e-09, + "loss": 0.1074981689453125, + "step": 14657 + }, + { + "epoch": 0.9908070839529539, + "grad_norm": 0.20033933222293854, + "learning_rate": 6.8106916440680015e-09, + "loss": 0.028369903564453125, + "step": 14658 + }, + { + "epoch": 0.990874678923888, + "grad_norm": 0.9545101523399353, + "learning_rate": 6.711635746043942e-09, + "loss": 0.14093017578125, + "step": 14659 + }, + { + "epoch": 0.9909422738948223, + "grad_norm": 1.0873782634735107, + "learning_rate": 6.613305316555129e-09, + "loss": 0.1051177978515625, + "step": 14660 + }, + { + "epoch": 0.9910098688657564, + "grad_norm": 0.22431987524032593, + "learning_rate": 6.515700360356092e-09, + "loss": 0.035251617431640625, + "step": 14661 + }, + { + "epoch": 0.9910774638366906, + "grad_norm": 0.4051605463027954, + "learning_rate": 6.418820882173049e-09, + "loss": 0.0460662841796875, + "step": 14662 + }, + { + "epoch": 0.9911450588076247, + "grad_norm": 0.8101682662963867, + "learning_rate": 6.322666886688921e-09, + "loss": 0.1350250244140625, + "step": 14663 + }, + { + "epoch": 0.9912126537785588, + "grad_norm": 0.9153363704681396, + "learning_rate": 6.227238378559985e-09, + "loss": 0.143768310546875, + "step": 14664 + }, + { + "epoch": 0.991280248749493, + "grad_norm": 0.7809767723083496, + "learning_rate": 6.132535362404212e-09, + "loss": 0.09622955322265625, + "step": 14665 + }, + { + "epoch": 0.9913478437204272, + "grad_norm": 0.7271924018859863, + "learning_rate": 6.038557842799608e-09, + "loss": 0.1035003662109375, + "step": 14666 + }, + { + "epoch": 0.9914154386913614, + "grad_norm": 0.7026301622390747, + "learning_rate": 5.945305824297531e-09, + "loss": 0.11800765991210938, + "step": 14667 + }, + { + "epoch": 0.9914830336622955, + "grad_norm": 0.2969331443309784, + "learning_rate": 5.852779311407708e-09, + "loss": 0.043590545654296875, + "step": 14668 + }, + { + "epoch": 0.9915506286332297, + "grad_norm": 0.8505642414093018, + "learning_rate": 5.760978308609888e-09, + "loss": 0.13050079345703125, + "step": 14669 + }, + { + "epoch": 0.9916182236041639, + "grad_norm": 0.5588565468788147, + "learning_rate": 5.669902820342188e-09, + "loss": 0.099945068359375, + "step": 14670 + }, + { + "epoch": 0.991685818575098, + "grad_norm": 0.36423927545547485, + "learning_rate": 5.579552851014414e-09, + "loss": 0.0717620849609375, + "step": 14671 + }, + { + "epoch": 0.9917534135460322, + "grad_norm": 1.0764336585998535, + "learning_rate": 5.489928404996403e-09, + "loss": 0.19805908203125, + "step": 14672 + }, + { + "epoch": 0.9918210085169663, + "grad_norm": 2.419586420059204, + "learning_rate": 5.4010294866246865e-09, + "loss": 0.2011566162109375, + "step": 14673 + }, + { + "epoch": 0.9918886034879005, + "grad_norm": 0.22875015437602997, + "learning_rate": 5.31285610020249e-09, + "loss": 0.039325714111328125, + "step": 14674 + }, + { + "epoch": 0.9919561984588346, + "grad_norm": 1.7052011489868164, + "learning_rate": 5.225408249996399e-09, + "loss": 0.1880645751953125, + "step": 14675 + }, + { + "epoch": 0.9920237934297689, + "grad_norm": 0.4327680170536041, + "learning_rate": 5.1386859402347e-09, + "loss": 0.06601715087890625, + "step": 14676 + }, + { + "epoch": 0.992091388400703, + "grad_norm": 0.8755927085876465, + "learning_rate": 5.052689175117364e-09, + "loss": 0.1298065185546875, + "step": 14677 + }, + { + "epoch": 0.9921589833716371, + "grad_norm": 0.24357210099697113, + "learning_rate": 4.9674179588010685e-09, + "loss": 0.030076980590820312, + "step": 14678 + }, + { + "epoch": 0.9922265783425713, + "grad_norm": 0.36396524310112, + "learning_rate": 4.882872295415841e-09, + "loss": 0.0600433349609375, + "step": 14679 + }, + { + "epoch": 0.9922941733135054, + "grad_norm": 0.6994645595550537, + "learning_rate": 4.799052189051745e-09, + "loss": 0.1097869873046875, + "step": 14680 + }, + { + "epoch": 0.9923617682844397, + "grad_norm": 0.8390215039253235, + "learning_rate": 4.715957643762203e-09, + "loss": 0.180999755859375, + "step": 14681 + }, + { + "epoch": 0.9924293632553738, + "grad_norm": 1.0853519439697266, + "learning_rate": 4.6335886635689995e-09, + "loss": 0.140625, + "step": 14682 + }, + { + "epoch": 0.992496958226308, + "grad_norm": 0.31555113196372986, + "learning_rate": 4.551945252458945e-09, + "loss": 0.05230712890625, + "step": 14683 + }, + { + "epoch": 0.9925645531972421, + "grad_norm": 1.1429344415664673, + "learning_rate": 4.471027414380546e-09, + "loss": 0.187408447265625, + "step": 14684 + }, + { + "epoch": 0.9926321481681762, + "grad_norm": 0.7349550724029541, + "learning_rate": 4.3908351532506715e-09, + "loss": 0.105560302734375, + "step": 14685 + }, + { + "epoch": 0.9926997431391105, + "grad_norm": 0.48555988073349, + "learning_rate": 4.311368472949551e-09, + "loss": 0.08837890625, + "step": 14686 + }, + { + "epoch": 0.9927673381100446, + "grad_norm": 0.3686596751213074, + "learning_rate": 4.232627377320775e-09, + "loss": 0.07280731201171875, + "step": 14687 + }, + { + "epoch": 0.9928349330809788, + "grad_norm": 2.965573787689209, + "learning_rate": 4.154611870174629e-09, + "loss": 0.20146942138671875, + "step": 14688 + }, + { + "epoch": 0.9929025280519129, + "grad_norm": 0.4837629795074463, + "learning_rate": 4.077321955288094e-09, + "loss": 0.06988525390625, + "step": 14689 + }, + { + "epoch": 0.9929701230228472, + "grad_norm": 0.4224412739276886, + "learning_rate": 4.000757636398178e-09, + "loss": 0.0770416259765625, + "step": 14690 + }, + { + "epoch": 0.9930377179937813, + "grad_norm": 0.3078501522541046, + "learning_rate": 3.924918917210251e-09, + "loss": 0.04736328125, + "step": 14691 + }, + { + "epoch": 0.9931053129647154, + "grad_norm": 0.8211241364479065, + "learning_rate": 3.849805801394712e-09, + "loss": 0.1651458740234375, + "step": 14692 + }, + { + "epoch": 0.9931729079356496, + "grad_norm": 1.2021554708480835, + "learning_rate": 3.775418292586985e-09, + "loss": 0.14768218994140625, + "step": 14693 + }, + { + "epoch": 0.9932405029065837, + "grad_norm": 0.19716456532478333, + "learning_rate": 3.7017563943841926e-09, + "loss": 0.031856536865234375, + "step": 14694 + }, + { + "epoch": 0.993308097877518, + "grad_norm": 0.4314277768135071, + "learning_rate": 3.6288201103501507e-09, + "loss": 0.09795379638671875, + "step": 14695 + }, + { + "epoch": 0.9933756928484521, + "grad_norm": 0.27136340737342834, + "learning_rate": 3.5566094440170337e-09, + "loss": 0.05176544189453125, + "step": 14696 + }, + { + "epoch": 0.9934432878193863, + "grad_norm": 0.253273606300354, + "learning_rate": 3.4851243988753832e-09, + "loss": 0.03287506103515625, + "step": 14697 + }, + { + "epoch": 0.9935108827903204, + "grad_norm": 0.2933524250984192, + "learning_rate": 3.4143649783874296e-09, + "loss": 0.0413970947265625, + "step": 14698 + }, + { + "epoch": 0.9935784777612545, + "grad_norm": 1.168432354927063, + "learning_rate": 3.3443311859737702e-09, + "loss": 0.14154434204101562, + "step": 14699 + }, + { + "epoch": 0.9936460727321887, + "grad_norm": 0.6105508208274841, + "learning_rate": 3.275023025026691e-09, + "loss": 0.09691619873046875, + "step": 14700 + }, + { + "epoch": 0.9937136677031229, + "grad_norm": 0.667350709438324, + "learning_rate": 3.20644049889518e-09, + "loss": 0.12706756591796875, + "step": 14701 + }, + { + "epoch": 0.9937812626740571, + "grad_norm": 0.6972696185112, + "learning_rate": 3.1385836109015796e-09, + "loss": 0.11043548583984375, + "step": 14702 + }, + { + "epoch": 0.9938488576449912, + "grad_norm": 0.32943105697631836, + "learning_rate": 3.071452364326599e-09, + "loss": 0.0672454833984375, + "step": 14703 + }, + { + "epoch": 0.9939164526159254, + "grad_norm": 0.23913222551345825, + "learning_rate": 3.005046762420971e-09, + "loss": 0.033107757568359375, + "step": 14704 + }, + { + "epoch": 0.9939840475868595, + "grad_norm": 0.7728555202484131, + "learning_rate": 2.9393668083954607e-09, + "loss": 0.14251708984375, + "step": 14705 + }, + { + "epoch": 0.9940516425577937, + "grad_norm": 1.2462348937988281, + "learning_rate": 2.8744125054308566e-09, + "loss": 0.19940185546875, + "step": 14706 + }, + { + "epoch": 0.9941192375287279, + "grad_norm": 1.1741843223571777, + "learning_rate": 2.810183856666315e-09, + "loss": 0.13805389404296875, + "step": 14707 + }, + { + "epoch": 0.994186832499662, + "grad_norm": 1.0208001136779785, + "learning_rate": 2.746680865212681e-09, + "loss": 0.1282958984375, + "step": 14708 + }, + { + "epoch": 0.9942544274705962, + "grad_norm": 0.3523133099079132, + "learning_rate": 2.68390353414083e-09, + "loss": 0.046085357666015625, + "step": 14709 + }, + { + "epoch": 0.9943220224415303, + "grad_norm": 0.7548127174377441, + "learning_rate": 2.621851866489999e-09, + "loss": 0.088409423828125, + "step": 14710 + }, + { + "epoch": 0.9943896174124646, + "grad_norm": 0.6368476152420044, + "learning_rate": 2.5605258652611208e-09, + "loss": 0.08896446228027344, + "step": 14711 + }, + { + "epoch": 0.9944572123833987, + "grad_norm": 1.1332422494888306, + "learning_rate": 2.4999255334218206e-09, + "loss": 0.13936996459960938, + "step": 14712 + }, + { + "epoch": 0.9945248073543328, + "grad_norm": 1.1459046602249146, + "learning_rate": 2.4400508739047532e-09, + "loss": 0.222076416015625, + "step": 14713 + }, + { + "epoch": 0.994592402325267, + "grad_norm": 1.668765902519226, + "learning_rate": 2.3809018896059356e-09, + "loss": 0.28717041015625, + "step": 14714 + }, + { + "epoch": 0.9946599972962011, + "grad_norm": 0.5763518214225769, + "learning_rate": 2.3224785833897422e-09, + "loss": 0.1042633056640625, + "step": 14715 + }, + { + "epoch": 0.9947275922671354, + "grad_norm": 0.7709938287734985, + "learning_rate": 2.264780958080581e-09, + "loss": 0.10944747924804688, + "step": 14716 + }, + { + "epoch": 0.9947951872380695, + "grad_norm": 0.8146252036094666, + "learning_rate": 2.2078090164728835e-09, + "loss": 0.1478271484375, + "step": 14717 + }, + { + "epoch": 0.9948627822090037, + "grad_norm": 0.2258438766002655, + "learning_rate": 2.1515627613194476e-09, + "loss": 0.029308319091796875, + "step": 14718 + }, + { + "epoch": 0.9949303771799378, + "grad_norm": 0.6188374161720276, + "learning_rate": 2.096042195346426e-09, + "loss": 0.09867095947265625, + "step": 14719 + }, + { + "epoch": 0.9949979721508719, + "grad_norm": 1.6724932193756104, + "learning_rate": 2.041247321235007e-09, + "loss": 0.11174774169921875, + "step": 14720 + }, + { + "epoch": 0.9950655671218062, + "grad_norm": 0.6790620684623718, + "learning_rate": 1.987178141641399e-09, + "loss": 0.126953125, + "step": 14721 + }, + { + "epoch": 0.9951331620927403, + "grad_norm": 0.31797105073928833, + "learning_rate": 1.9338346591785127e-09, + "loss": 0.057586669921875, + "step": 14722 + }, + { + "epoch": 0.9952007570636745, + "grad_norm": 0.3120309114456177, + "learning_rate": 1.881216876430947e-09, + "loss": 0.038308143615722656, + "step": 14723 + }, + { + "epoch": 0.9952683520346086, + "grad_norm": 0.9500080943107605, + "learning_rate": 1.8293247959400016e-09, + "loss": 0.179840087890625, + "step": 14724 + }, + { + "epoch": 0.9953359470055428, + "grad_norm": 0.3414144814014435, + "learning_rate": 1.7781584202203328e-09, + "loss": 0.0618896484375, + "step": 14725 + }, + { + "epoch": 0.995403541976477, + "grad_norm": 1.174798846244812, + "learning_rate": 1.7277177517449615e-09, + "loss": 0.1454315185546875, + "step": 14726 + }, + { + "epoch": 0.9954711369474111, + "grad_norm": 0.3233250677585602, + "learning_rate": 1.6780027929569341e-09, + "loss": 0.06601715087890625, + "step": 14727 + }, + { + "epoch": 0.9955387319183453, + "grad_norm": 0.3378120958805084, + "learning_rate": 1.6290135462609933e-09, + "loss": 0.062404632568359375, + "step": 14728 + }, + { + "epoch": 0.9956063268892794, + "grad_norm": 0.4917137920856476, + "learning_rate": 1.5807500140252451e-09, + "loss": 0.079742431640625, + "step": 14729 + }, + { + "epoch": 0.9956739218602136, + "grad_norm": 0.6531800031661987, + "learning_rate": 1.5332121985894842e-09, + "loss": 0.132965087890625, + "step": 14730 + }, + { + "epoch": 0.9957415168311478, + "grad_norm": 0.6808592677116394, + "learning_rate": 1.4864001022502071e-09, + "loss": 0.1318206787109375, + "step": 14731 + }, + { + "epoch": 0.9958091118020819, + "grad_norm": 0.4887646734714508, + "learning_rate": 1.440313727273934e-09, + "loss": 0.08913803100585938, + "step": 14732 + }, + { + "epoch": 0.9958767067730161, + "grad_norm": 0.45201149582862854, + "learning_rate": 1.3949530758888829e-09, + "loss": 0.075958251953125, + "step": 14733 + }, + { + "epoch": 0.9959443017439502, + "grad_norm": 0.4909307658672333, + "learning_rate": 1.3503181502932949e-09, + "loss": 0.0870513916015625, + "step": 14734 + }, + { + "epoch": 0.9960118967148844, + "grad_norm": 1.3133232593536377, + "learning_rate": 1.3064089526437783e-09, + "loss": 0.176483154296875, + "step": 14735 + }, + { + "epoch": 0.9960794916858186, + "grad_norm": 0.514555811882019, + "learning_rate": 1.2632254850669655e-09, + "loss": 0.06395721435546875, + "step": 14736 + }, + { + "epoch": 0.9961470866567528, + "grad_norm": 1.2625707387924194, + "learning_rate": 1.220767749651186e-09, + "loss": 0.115203857421875, + "step": 14737 + }, + { + "epoch": 0.9962146816276869, + "grad_norm": 0.45631787180900574, + "learning_rate": 1.1790357484497971e-09, + "loss": 0.06722640991210938, + "step": 14738 + }, + { + "epoch": 0.996282276598621, + "grad_norm": 0.42228108644485474, + "learning_rate": 1.138029483484515e-09, + "loss": 0.0843353271484375, + "step": 14739 + }, + { + "epoch": 0.9963498715695552, + "grad_norm": 0.474334180355072, + "learning_rate": 1.0977489567387534e-09, + "loss": 0.0990753173828125, + "step": 14740 + }, + { + "epoch": 0.9964174665404893, + "grad_norm": 0.34860727190971375, + "learning_rate": 1.0581941701609532e-09, + "loss": 0.06658935546875, + "step": 14741 + }, + { + "epoch": 0.9964850615114236, + "grad_norm": 0.337224543094635, + "learning_rate": 1.0193651256645843e-09, + "loss": 0.061187744140625, + "step": 14742 + }, + { + "epoch": 0.9965526564823577, + "grad_norm": 0.6155827641487122, + "learning_rate": 9.812618251281435e-10, + "loss": 0.1113128662109375, + "step": 14743 + }, + { + "epoch": 0.9966202514532919, + "grad_norm": 0.392333060503006, + "learning_rate": 9.43884270396822e-10, + "loss": 0.05596160888671875, + "step": 14744 + }, + { + "epoch": 0.996687846424226, + "grad_norm": 1.0189929008483887, + "learning_rate": 9.072324632791728e-10, + "loss": 0.16487884521484375, + "step": 14745 + }, + { + "epoch": 0.9967554413951601, + "grad_norm": 0.9851943254470825, + "learning_rate": 8.71306405547112e-10, + "loss": 0.1691436767578125, + "step": 14746 + }, + { + "epoch": 0.9968230363660944, + "grad_norm": 1.3824543952941895, + "learning_rate": 8.361060989409142e-10, + "loss": 0.16786956787109375, + "step": 14747 + }, + { + "epoch": 0.9968906313370285, + "grad_norm": 0.32783082127571106, + "learning_rate": 8.016315451608858e-10, + "loss": 0.04560089111328125, + "step": 14748 + }, + { + "epoch": 0.9969582263079627, + "grad_norm": 0.3024153709411621, + "learning_rate": 7.678827458790228e-10, + "loss": 0.0589752197265625, + "step": 14749 + }, + { + "epoch": 0.9970258212788968, + "grad_norm": 0.3420148491859436, + "learning_rate": 7.348597027256876e-10, + "loss": 0.04562187194824219, + "step": 14750 + }, + { + "epoch": 0.997093416249831, + "grad_norm": 0.4225155711174011, + "learning_rate": 7.025624172979362e-10, + "loss": 0.076202392578125, + "step": 14751 + }, + { + "epoch": 0.9971610112207652, + "grad_norm": 0.7011803984642029, + "learning_rate": 6.709908911611828e-10, + "loss": 0.10650634765625, + "step": 14752 + }, + { + "epoch": 0.9972286061916993, + "grad_norm": 0.4448903203010559, + "learning_rate": 6.401451258408742e-10, + "loss": 0.06974220275878906, + "step": 14753 + }, + { + "epoch": 0.9972962011626335, + "grad_norm": 0.3440297245979309, + "learning_rate": 6.100251228308151e-10, + "loss": 0.05928230285644531, + "step": 14754 + }, + { + "epoch": 0.9973637961335676, + "grad_norm": 0.6300401091575623, + "learning_rate": 5.806308835881735e-10, + "loss": 0.1352081298828125, + "step": 14755 + }, + { + "epoch": 0.9974313911045019, + "grad_norm": 0.37462449073791504, + "learning_rate": 5.51962409535145e-10, + "loss": 0.078643798828125, + "step": 14756 + }, + { + "epoch": 0.997498986075436, + "grad_norm": 0.7743452787399292, + "learning_rate": 5.240197020572879e-10, + "loss": 0.111083984375, + "step": 14757 + }, + { + "epoch": 0.9975665810463702, + "grad_norm": 0.48821118474006653, + "learning_rate": 4.968027625101845e-10, + "loss": 0.10102081298828125, + "step": 14758 + }, + { + "epoch": 0.9976341760173043, + "grad_norm": 1.2484238147735596, + "learning_rate": 4.703115922077839e-10, + "loss": 0.1835784912109375, + "step": 14759 + }, + { + "epoch": 0.9977017709882384, + "grad_norm": 0.3905677795410156, + "learning_rate": 4.4454619243239347e-10, + "loss": 0.06839942932128906, + "step": 14760 + }, + { + "epoch": 0.9977693659591726, + "grad_norm": 0.5603174567222595, + "learning_rate": 4.1950656443134893e-10, + "loss": 0.0892333984375, + "step": 14761 + }, + { + "epoch": 0.9978369609301068, + "grad_norm": 0.8760934472084045, + "learning_rate": 3.9519270941534847e-10, + "loss": 0.141815185546875, + "step": 14762 + }, + { + "epoch": 0.997904555901041, + "grad_norm": 1.0060248374938965, + "learning_rate": 3.716046285617836e-10, + "loss": 0.12679290771484375, + "step": 14763 + }, + { + "epoch": 0.9979721508719751, + "grad_norm": 0.9462021589279175, + "learning_rate": 3.4874232301307375e-10, + "loss": 0.16680908203125, + "step": 14764 + }, + { + "epoch": 0.9980397458429093, + "grad_norm": 1.2865303754806519, + "learning_rate": 3.2660579387333576e-10, + "loss": 0.1523590087890625, + "step": 14765 + }, + { + "epoch": 0.9981073408138434, + "grad_norm": 0.4516562223434448, + "learning_rate": 3.051950422133798e-10, + "loss": 0.06139373779296875, + "step": 14766 + }, + { + "epoch": 0.9981749357847776, + "grad_norm": 1.0522934198379517, + "learning_rate": 2.845100690707092e-10, + "loss": 0.1345062255859375, + "step": 14767 + }, + { + "epoch": 0.9982425307557118, + "grad_norm": 0.6958136558532715, + "learning_rate": 2.6455087544619006e-10, + "loss": 0.136993408203125, + "step": 14768 + }, + { + "epoch": 0.9983101257266459, + "grad_norm": 0.8680720329284668, + "learning_rate": 2.4531746230405105e-10, + "loss": 0.1439666748046875, + "step": 14769 + }, + { + "epoch": 0.9983777206975801, + "grad_norm": 1.0892795324325562, + "learning_rate": 2.2680983057687954e-10, + "loss": 0.1389007568359375, + "step": 14770 + }, + { + "epoch": 0.9984453156685142, + "grad_norm": 0.8681121468544006, + "learning_rate": 2.0902798115896017e-10, + "loss": 0.139739990234375, + "step": 14771 + }, + { + "epoch": 0.9985129106394485, + "grad_norm": 0.32652899622917175, + "learning_rate": 1.9197191490960553e-10, + "loss": 0.04676055908203125, + "step": 14772 + }, + { + "epoch": 0.9985805056103826, + "grad_norm": 0.37426310777664185, + "learning_rate": 1.7564163265815226e-10, + "loss": 0.0553741455078125, + "step": 14773 + }, + { + "epoch": 0.9986481005813167, + "grad_norm": 0.4890690743923187, + "learning_rate": 1.6003713519063823e-10, + "loss": 0.0955657958984375, + "step": 14774 + }, + { + "epoch": 0.9987156955522509, + "grad_norm": 0.444466769695282, + "learning_rate": 1.4515842326312535e-10, + "loss": 0.09188461303710938, + "step": 14775 + }, + { + "epoch": 0.998783290523185, + "grad_norm": 0.4748975932598114, + "learning_rate": 1.3100549759670343e-10, + "loss": 0.082122802734375, + "step": 14776 + }, + { + "epoch": 0.9988508854941193, + "grad_norm": 0.49940025806427, + "learning_rate": 1.1757835887415968e-10, + "loss": 0.08492469787597656, + "step": 14777 + }, + { + "epoch": 0.9989184804650534, + "grad_norm": 1.8313307762145996, + "learning_rate": 1.0487700774663989e-10, + "loss": 0.1944580078125, + "step": 14778 + }, + { + "epoch": 0.9989860754359876, + "grad_norm": 0.8672401905059814, + "learning_rate": 9.290144483031781e-11, + "loss": 0.116851806640625, + "step": 14779 + }, + { + "epoch": 0.9990536704069217, + "grad_norm": 0.6243359446525574, + "learning_rate": 8.165167070139922e-11, + "loss": 0.07525634765625, + "step": 14780 + }, + { + "epoch": 0.9991212653778558, + "grad_norm": 1.8103269338607788, + "learning_rate": 7.112768590611385e-11, + "loss": 0.1277313232421875, + "step": 14781 + }, + { + "epoch": 0.9991888603487901, + "grad_norm": 0.3688494563102722, + "learning_rate": 6.13294909523887e-11, + "loss": 0.04730987548828125, + "step": 14782 + }, + { + "epoch": 0.9992564553197242, + "grad_norm": 0.31809577345848083, + "learning_rate": 5.2257086316509495e-11, + "loss": 0.04525566101074219, + "step": 14783 + }, + { + "epoch": 0.9993240502906584, + "grad_norm": 1.9819376468658447, + "learning_rate": 4.391047243479385e-11, + "loss": 0.221160888671875, + "step": 14784 + }, + { + "epoch": 0.9993916452615925, + "grad_norm": 0.8883200883865356, + "learning_rate": 3.62896497135834e-11, + "loss": 0.1556396484375, + "step": 14785 + }, + { + "epoch": 0.9994592402325267, + "grad_norm": 0.9914294481277466, + "learning_rate": 2.939461851925174e-11, + "loss": 0.1574554443359375, + "step": 14786 + }, + { + "epoch": 0.9995268352034609, + "grad_norm": 0.33776789903640747, + "learning_rate": 2.322537918819645e-11, + "loss": 0.06160736083984375, + "step": 14787 + }, + { + "epoch": 0.999594430174395, + "grad_norm": 1.0510004758834839, + "learning_rate": 1.778193201684708e-11, + "loss": 0.151763916015625, + "step": 14788 + }, + { + "epoch": 0.9996620251453292, + "grad_norm": 0.3481443524360657, + "learning_rate": 1.3064277268326486e-11, + "loss": 0.05579376220703125, + "step": 14789 + }, + { + "epoch": 0.9997296201162633, + "grad_norm": 1.8471194505691528, + "learning_rate": 9.072415172450831e-12, + "loss": 0.25164794921875, + "step": 14790 + }, + { + "epoch": 0.9997972150871975, + "grad_norm": 1.0375932455062866, + "learning_rate": 5.806345920733591e-12, + "loss": 0.094573974609375, + "step": 14791 + }, + { + "epoch": 0.9998648100581317, + "grad_norm": 0.6337073445320129, + "learning_rate": 3.2660696730468786e-12, + "loss": 0.111846923828125, + "step": 14792 + }, + { + "epoch": 0.9999324050290659, + "grad_norm": 1.941968321800232, + "learning_rate": 1.4515865509601157e-12, + "loss": 0.233367919921875, + "step": 14793 + }, + { + "epoch": 1.0, + "grad_norm": 0.9152747988700867, + "learning_rate": 3.628966427360325e-13, + "loss": 0.1481781005859375, + "step": 14794 + }, + { + "epoch": 1.0, + "step": 14794, + "total_flos": 8.369212778222295e+19, + "train_loss": 0.05663900421618835, + "train_runtime": 58243.7214, + "train_samples_per_second": 32.511, + "train_steps_per_second": 0.254 + } + ], + "logging_steps": 1.0, + "max_steps": 14794, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 8.369212778222295e+19, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +}